3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
13 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
14 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
18 extern struct LockValue runtime_locks[MAXTASKPARAMS];
19 extern int runtime_locklen;
22 extern struct global_defs_t * global_defs_p;
25 extern unsigned int gcmem_mixed_threshold;
26 extern unsigned int gcmem_mixed_usedmem;
30 extern INTPTR bamboo_threadlocks;
35 struct pointerblock *next;
38 struct pointerblock *gchead=NULL;
40 struct pointerblock *gctail=NULL;
42 struct pointerblock *gctail2=NULL;
44 struct pointerblock *gcspare=NULL;
46 #define NUMLOBJPTRS 20
48 struct lobjpointerblock {
49 void * lobjs[NUMLOBJPTRS];
50 int lengths[NUMLOBJPTRS];
51 int hosts[NUMLOBJPTRS];
52 struct lobjpointerblock *next;
53 struct lobjpointerblock *prev;
56 struct lobjpointerblock *gclobjhead=NULL;
57 int gclobjheadindex=0;
58 struct lobjpointerblock *gclobjtail=NULL;
59 int gclobjtailindex=0;
60 struct lobjpointerblock *gclobjtail2=NULL;
61 int gclobjtailindex2=0;
62 struct lobjpointerblock *gclobjspare=NULL;
65 typedef struct gc_cache_revise_info {
66 int orig_page_start_va;
72 unsigned int revised_sampling[NUMCORESACTIVE];
73 } gc_cache_revise_info_t;
74 gc_cache_revise_info_t gc_cache_revise_infomation;
75 #endif// GC_CACHE_ADAPT
78 // dump whole mem in blocks
79 inline void dumpSMem() {
87 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
89 // reserved blocks for sblocktbl
90 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
92 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
93 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
94 udn_tile_coord_x(), udn_tile_coord_y(),
95 *((int *)(i)), *((int *)(i + 4)),
96 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
97 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
98 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
99 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
100 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
101 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
102 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
104 sblock = gcreservedsb;
105 bool advanceblock = false;
107 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
108 advanceblock = false;
109 // computing sblock # and block #, core coordinate (x,y) also
110 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
112 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
113 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
125 coren = gc_block2core[block%(NUMCORES4GC*2)];
127 // compute core coordinate
128 BAMBOO_COORDS(coren, &x, &y);
129 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
130 udn_tile_coord_x(), udn_tile_coord_y(),
131 block, sblock++, x, y,
132 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
135 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
136 udn_tile_coord_x(), udn_tile_coord_y(),
137 *((int *)(i)), *((int *)(i + 4)),
138 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
139 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
140 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
141 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
142 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
143 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
144 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
146 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
150 // should be invoked with interruption closed
151 inline void gc_enqueue_I(void *ptr) {
152 GC_BAMBOO_DEBUGPRINT(0xe601);
153 GC_BAMBOO_DEBUGPRINT_REG(ptr);
154 if (gcheadindex==NUMPTRS) {
155 struct pointerblock * tmp;
160 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
161 } // if (gcspare!=NULL)
165 } // if (gcheadindex==NUMPTRS)
166 gchead->ptrs[gcheadindex++]=ptr;
167 GC_BAMBOO_DEBUGPRINT(0xe602);
168 } // void gc_enqueue_I(void *ptr)
170 // dequeue and destroy the queue
171 inline void * gc_dequeue_I() {
172 if (gctailindex==NUMPTRS) {
173 struct pointerblock *tmp=gctail;
180 } // if (gcspare!=NULL)
181 } // if (gctailindex==NUMPTRS)
182 return gctail->ptrs[gctailindex++];
183 } // void * gc_dequeue()
185 // dequeue and do not destroy the queue
186 inline void * gc_dequeue2_I() {
187 if (gctailindex2==NUMPTRS) {
188 struct pointerblock *tmp=gctail2;
189 gctail2=gctail2->next;
191 } // if (gctailindex2==NUMPTRS)
192 return gctail2->ptrs[gctailindex2++];
193 } // void * gc_dequeue2()
195 inline int gc_moreItems_I() {
196 if ((gchead==gctail)&&(gctailindex==gcheadindex))
199 } // int gc_moreItems()
201 inline int gc_moreItems2_I() {
202 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
205 } // int gc_moreItems2()
207 // should be invoked with interruption closed
208 // enqueue a large obj: start addr & length
209 inline void gc_lobjenqueue_I(void *ptr,
212 GC_BAMBOO_DEBUGPRINT(0xe901);
213 if (gclobjheadindex==NUMLOBJPTRS) {
214 struct lobjpointerblock * tmp;
215 if (gclobjspare!=NULL) {
219 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
220 } // if (gclobjspare!=NULL)
221 gclobjhead->next=tmp;
222 tmp->prev = gclobjhead;
225 } // if (gclobjheadindex==NUMLOBJPTRS)
226 gclobjhead->lobjs[gclobjheadindex]=ptr;
227 gclobjhead->lengths[gclobjheadindex]=length;
228 gclobjhead->hosts[gclobjheadindex++]=host;
229 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
230 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
231 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
232 } // void gc_lobjenqueue_I(void *ptr...)
234 // dequeue and destroy the queue
235 inline void * gc_lobjdequeue_I(int * length,
237 if (gclobjtailindex==NUMLOBJPTRS) {
238 struct lobjpointerblock *tmp=gclobjtail;
239 gclobjtail=gclobjtail->next;
241 gclobjtail->prev = NULL;
242 if (gclobjspare!=NULL) {
248 } // if (gclobjspare!=NULL)
249 } // if (gclobjtailindex==NUMLOBJPTRS)
251 *length = gclobjtail->lengths[gclobjtailindex];
254 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
256 return gclobjtail->lobjs[gclobjtailindex++];
257 } // void * gc_lobjdequeue()
259 inline int gc_lobjmoreItems_I() {
260 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
263 } // int gc_lobjmoreItems()
265 // dequeue and don't destroy the queue
266 inline void gc_lobjdequeue2_I() {
267 if (gclobjtailindex2==NUMLOBJPTRS) {
268 gclobjtail2=gclobjtail2->next;
272 } // if (gclobjtailindex2==NUMLOBJPTRS)
273 } // void * gc_lobjdequeue2()
275 inline int gc_lobjmoreItems2_I() {
276 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
279 } // int gc_lobjmoreItems2()
281 // 'reversly' dequeue and don't destroy the queue
282 inline void gc_lobjdequeue3_I() {
283 if (gclobjtailindex2==0) {
284 gclobjtail2=gclobjtail2->prev;
285 gclobjtailindex2=NUMLOBJPTRS-1;
288 } // if (gclobjtailindex2==NUMLOBJPTRS)
289 } // void * gc_lobjdequeue3()
291 inline int gc_lobjmoreItems3_I() {
292 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
295 } // int gc_lobjmoreItems3()
297 inline void gc_lobjqueueinit4_I() {
298 gclobjtail2 = gclobjtail;
299 gclobjtailindex2 = gclobjtailindex;
300 } // void gc_lobjqueueinit2()
302 inline void * gc_lobjdequeue4_I(int * length,
304 if (gclobjtailindex2==NUMLOBJPTRS) {
305 gclobjtail2=gclobjtail2->next;
307 } // if (gclobjtailindex==NUMLOBJPTRS)
309 *length = gclobjtail2->lengths[gclobjtailindex2];
312 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
314 return gclobjtail2->lobjs[gclobjtailindex2++];
315 } // void * gc_lobjdequeue()
317 inline int gc_lobjmoreItems4_I() {
318 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
321 } // int gc_lobjmoreItems(
323 INTPTR gccurr_heapbound = 0;
325 inline void gettype_size(void * ptr,
328 int type = ((int *)ptr)[0];
330 if(type < NUMCLASSES) {
332 size = classsize[type];
335 struct ArrayObject *ao=(struct ArrayObject *)ptr;
336 int elementsize=classsize[type];
337 int length=ao->___length___;
338 size=sizeof(struct ArrayObject)+length*elementsize;
339 } // if(type < NUMCLASSES)
344 inline bool isLarge(void * ptr,
347 GC_BAMBOO_DEBUGPRINT(0xe701);
348 GC_BAMBOO_DEBUGPRINT_REG(ptr);
349 // check if a pointer is referring to a large object
350 gettype_size(ptr, ttype, tsize);
351 GC_BAMBOO_DEBUGPRINT(*tsize);
352 int bound = (BAMBOO_SMEM_SIZE);
353 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
354 bound = (BAMBOO_SMEM_SIZE_L);
356 if((((int)ptr-gcbaseva)%(bound))==0) {
357 // ptr is a start of a block
358 GC_BAMBOO_DEBUGPRINT(0xe702);
359 GC_BAMBOO_DEBUGPRINT(1);
362 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
363 // it acrosses the boundary of current block
364 GC_BAMBOO_DEBUGPRINT(0xe703);
365 GC_BAMBOO_DEBUGPRINT(1);
368 GC_BAMBOO_DEBUGPRINT(0);
370 } // bool isLarge(void * ptr, int * ttype, int * tsize)
372 inline int hostcore(void * ptr) {
373 // check the host core of ptr
375 RESIDECORE(ptr, &host);
376 GC_BAMBOO_DEBUGPRINT(0xedd0);
377 GC_BAMBOO_DEBUGPRINT_REG(ptr);
378 GC_BAMBOO_DEBUGPRINT_REG(host);
380 } // int hostcore(void * ptr)
382 inline void cpu2coords(int coren,
385 *x = bamboo_cpu2coords[2*coren];
386 *y = bamboo_cpu2coords[2*coren+1];
387 } // void cpu2coords(...)
389 inline bool isLocal(void * ptr) {
390 // check if a pointer is in shared heap on this core
391 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
392 } // bool isLocal(void * ptr)
394 inline bool gc_checkCoreStatus_I() {
395 bool allStall = true;
396 for(int i = 0; i < NUMCORES4GC; ++i) {
397 if(gccorestatus[i] != 0) {
400 } // if(gccorestatus[i] != 0)
401 } // for(i = 0; i < NUMCORES4GC; ++i)
405 inline bool gc_checkAllCoreStatus_I() {
406 bool allStall = true;
407 for(int i = 0; i < NUMCORESACTIVE; ++i) {
408 if(gccorestatus[i] != 0) {
411 } // if(gccorestatus[i] != 0)
412 } // for(i = 0; i < NUMCORESACTIVE; ++i)
416 inline void checkMarkStatue() {
417 GC_BAMBOO_DEBUGPRINT(0xee01);
420 (waitconfirm && (numconfirm == 0))) {
421 GC_BAMBOO_DEBUGPRINT(0xee02);
425 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
428 entry_index = gcnumsrobjs_index;
430 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
431 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
432 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
433 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
434 // check the status of all cores
435 bool allStall = gc_checkAllCoreStatus_I();
436 GC_BAMBOO_DEBUGPRINT(0xee03);
438 GC_BAMBOO_DEBUGPRINT(0xee04);
441 GC_BAMBOO_DEBUGPRINT(0xee05);
442 // the first time found all cores stall
443 // send out status confirm msg to all other cores
444 // reset the corestatus array too
445 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
447 numconfirm = NUMCORESACTIVE - 1;
448 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
449 for(i = 1; i < NUMCORESACTIVE; ++i) {
451 // send mark phase finish confirm request msg to core i
452 send_msg_1(i, GCMARKCONFIRM, false);
453 } // for(i = 1; i < NUMCORESACTIVE; ++i)
456 // check if the sum of send objs and receive obj are the same
457 // yes->check if the info is the latest; no->go on executing
459 for(i = 0; i < NUMCORESACTIVE; ++i) {
460 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
461 } // for(i = 0; i < NUMCORESACTIVE; ++i)
462 GC_BAMBOO_DEBUGPRINT(0xee06);
463 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
464 for(i = 0; i < NUMCORESACTIVE; ++i) {
465 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
466 } // for(i = 0; i < NUMCORESACTIVE; ++i)
467 GC_BAMBOO_DEBUGPRINT(0xee07);
468 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
469 if(0 == sumsendobj) {
470 // Check if there are changes of the numsendobjs or numreceiveobjs on
472 bool ischanged = false;
473 for(i = 0; i < NUMCORESACTIVE; ++i) {
474 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
475 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
479 } // for(i = 0; i < NUMCORESACTIVE; ++i)
480 GC_BAMBOO_DEBUGPRINT(0xee08);
481 GC_BAMBOO_DEBUGPRINT_REG(ischanged);
483 GC_BAMBOO_DEBUGPRINT(0xee09);
484 // all the core status info are the latest
486 gcphase = COMPACTPHASE;
487 // restore the gcstatus for all cores
488 for(i = 0; i < NUMCORESACTIVE; ++i) {
490 } // for(i = 0; i < NUMCORESACTIVE; ++i)
493 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
496 // There were changes between phase 1 and phase 2, can not decide
497 // whether the mark phase has been finished
499 // As it fails in phase 2, flip the entries
500 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
501 } // if(0 == sumsendobj) else ...
502 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
503 } // if(!gcwaitconfirm) else()
505 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
507 } // if((!waitconfirm)...
508 GC_BAMBOO_DEBUGPRINT(0xee0a);
509 } // void checkMarkStatue()
511 inline void initGC() {
513 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
514 for(i = 0; i < NUMCORES4GC; ++i) {
516 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
517 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
519 gcrequiredmems[i] = 0;
520 gcfilledblocks[i] = 0;
522 } // for(i = 0; i < NUMCORES4GC; ++i)
523 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
525 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
526 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
531 #ifdef MAPPINGTBL_DEBUG
532 // initialize the gcmappingtbl
533 BAMBOO_MEMSET_WH(gcmappingtbl, 0, bamboo_rmsp_size);
535 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
536 gcself_numsendobjs = 0;
537 gcself_numreceiveobjs = 0;
538 gcmarkedptrbound = 0;
549 gcheadindex=gctailindex=gctailindex2 = 0;
550 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
552 gctailindex = gctailindex2 = gcheadindex;
553 gctail = gctail2 = gchead;
556 // initialize the large obj queues
557 if (gclobjhead==NULL) {
560 gclobjtailindex2 = 0;
561 gclobjhead=gclobjtail=gclobjtail2=
562 RUNMALLOC(sizeof(struct lobjpointerblock));
564 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
565 gclobjtail = gclobjtail2 = gclobjhead;
567 gclobjhead->next = gclobjhead->prev = NULL;
569 freeMGCHash(gcforwardobjtbl);
570 gcforwardobjtbl = allocateMGCHash(20, 3);
573 gc_num_livespace = 0;
574 gc_num_freespace = 0;
576 gc_num_lobjspace = 0;
578 gc_num_forwardobj = 0;
579 gc_num_profiles = NUMCORESACTIVE - 1;
583 // compute load balance for all cores
584 inline int loadbalance(int * heaptop) {
585 // compute load balance
588 // get the total loads
589 int tloads = gcloads[STARTUPCORE];
590 for(i = 1; i < NUMCORES4GC; i++) {
591 tloads += gcloads[i];
593 *heaptop = gcbaseva + tloads;
595 GC_BAMBOO_DEBUGPRINT(0xdddd);
596 GC_BAMBOO_DEBUGPRINT_REG(tloads);
597 GC_BAMBOO_DEBUGPRINT_REG(*heaptop);
599 BLOCKINDEX(*heaptop, &b);
600 int numbpc = b / NUMCORES4GC; // num of blocks per core
601 GC_BAMBOO_DEBUGPRINT_REG(b);
602 GC_BAMBOO_DEBUGPRINT_REG(numbpc);
604 RESIDECORE(heaptop, &gctopcore);
605 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
607 } // void loadbalance(int * heaptop)
609 inline bool cacheLObjs() {
610 // check the total mem size need for large objs
611 unsigned long long sumsize = 0;
613 GC_BAMBOO_DEBUGPRINT(0xe801);
614 gclobjtail2 = gclobjtail;
615 gclobjtailindex2 = gclobjtailindex;
619 // compute total mem size required and sort the lobjs in ascending order
620 // TODO USE QUICK SORT INSTEAD?
621 while(gc_lobjmoreItems2_I()) {
623 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
624 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
625 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
630 GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
631 GC_BAMBOO_DEBUGPRINT_REG(tmp_len);
632 GC_BAMBOO_DEBUGPRINT_REG(sumsize);
633 int i = gclobjtailindex2-1;
634 struct lobjpointerblock * tmp_block = gclobjtail2;
635 // find the place to insert
638 if(tmp_block->prev == NULL) {
641 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
642 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
643 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
644 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
645 tmp_block = tmp_block->prev;
649 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
651 if(tmp_block->lobjs[i-1] > tmp_lobj) {
652 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
653 tmp_block->lengths[i] = tmp_block->lengths[i-1];
654 tmp_block->hosts[i] = tmp_block->hosts[i-1];
658 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
659 } // if(i ==0 ) else {}
662 if(i != gclobjtailindex2 - 1) {
663 tmp_block->lobjs[i] = tmp_lobj;
664 tmp_block->lengths[i] = tmp_len;
665 tmp_block->hosts[i] = tmp_host;
667 } // while(gc_lobjmoreItems2())
670 gc_num_lobjspace = sumsize;
672 // check if there are enough space to cache these large objs
673 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
674 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
675 // do not have enough room to cache large objs
676 GC_BAMBOO_DEBUGPRINT(0xe802);
677 GC_BAMBOO_DEBUGPRINT_REG(dst);
678 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
679 GC_BAMBOO_DEBUGPRINT_REG(sumsize);
682 GC_BAMBOO_DEBUGPRINT(0xe803);
683 GC_BAMBOO_DEBUGPRINT_REG(dst);
684 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
686 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
687 // cache the largeObjs to the top of the shared heap
688 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
689 while(gc_lobjmoreItems3_I()) {
691 size = gclobjtail2->lengths[gclobjtailindex2];
692 // set the mark field to , indicating that this obj has been moved
693 // and need to be flushed
694 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[BAMBOOMARKBIT] = COMPACTED;
696 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
697 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
699 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
701 GC_BAMBOO_DEBUGPRINT(0x804);
702 GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
703 GC_BAMBOO_DEBUGPRINT(dst);
704 GC_BAMBOO_DEBUGPRINT_REG(size);
705 GC_BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
706 GC_BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
709 } // void cacheLObjs()
711 // update the bmmboo_smemtbl to record current shared mem usage
712 void updateSmemTbl(int coren,
715 int bound = BAMBOO_SMEM_SIZE_L;
716 BLOCKINDEX(localtop, <opcore);
717 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
718 bound = BAMBOO_SMEM_SIZE;
720 int load = (localtop-gcbaseva)%bound;
725 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
726 if(toset < ltopcore) {
727 bamboo_smemtbl[toset]=
728 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
730 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
732 } else if(toset == ltopcore) {
733 bamboo_smemtbl[toset] = load;
735 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
747 } // void updateSmemTbl(int, int)
749 inline void moveLObjs() {
750 GC_BAMBOO_DEBUGPRINT(0xea01);
752 // update the gcmem_mixed_usedmem
753 gcmem_mixed_usedmem = 0;
755 // zero out the smemtbl
756 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
757 // find current heap top
758 // flush all gcloads to indicate the real heap top on one core
759 // previous it represents the next available ptr on a core
760 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
761 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
762 // edge of a block, check if this is exactly the heaptop
763 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
764 gcloads[0]+=(gcfilledblocks[0]>1 ?
765 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
767 updateSmemTbl(0, gcloads[0]);
768 GC_BAMBOO_DEBUGPRINT(0xea02);
769 GC_BAMBOO_DEBUGPRINT_REG(gcloads[0]);
770 GC_BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
771 for(int i = 1; i < NUMCORES4GC; i++) {
773 GC_BAMBOO_DEBUGPRINT(0xf000+i);
774 GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
775 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
776 if((gcfilledblocks[i] > 0)
777 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
778 // edge of a block, check if this is exactly the heaptop
779 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
781 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
784 updateSmemTbl(i, gcloads[i]);
785 GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
786 } // for(int i = 1; i < NUMCORES4GC; i++) {
788 // find current heap top
790 // a bug here: when using local allocation, directly move large objects
791 // to the highest free chunk might not be memory efficient
796 for(i = gcnumblock-1; i >= 0; i--) {
797 if(bamboo_smemtbl[i] > 0) {
802 tmpheaptop = gcbaseva;
804 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
805 (BAMBOO_SMEM_SIZE_L*i) :
806 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
809 // move large objs from gcheaptop to tmpheaptop
810 // write the header first
811 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
813 gcmem_mixed_usedmem += tomove;
815 GC_BAMBOO_DEBUGPRINT(0xea03);
816 GC_BAMBOO_DEBUGPRINT_REG(tomove);
817 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
818 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
819 // flush the sbstartbl
820 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
821 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
823 gcheaptop = tmpheaptop;
825 // check how many blocks it acrosses
826 int remain = tmpheaptop-gcbaseva;
827 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
828 int b = 0; // number of the block
829 BLOCKINDEX(tmpheaptop, &b);
830 // check the remaining space in this block
831 bound = (BAMBOO_SMEM_SIZE);
832 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
833 bound = (BAMBOO_SMEM_SIZE_L);
835 remain = bound - remain%bound;
837 GC_BAMBOO_DEBUGPRINT(0xea04);
842 int base = tmpheaptop;
844 remain -= BAMBOO_CACHE_LINE_SIZE;
845 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
846 gc_lobjqueueinit4_I();
847 while(gc_lobjmoreItems4_I()) {
848 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
849 ALIGNSIZE(size, &isize);
851 // this object acrosses blocks
853 // close current block, fill its header
854 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
855 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
856 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
860 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
861 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
863 remain -= BAMBOO_CACHE_LINE_SIZE;
864 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
865 BLOCKINDEX(tmpheaptop, &b);
866 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
869 // move the large obj
870 if((int)gcheaptop < (int)(tmpheaptop)+size) {
871 memmove(tmpheaptop, gcheaptop, size);
873 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
874 memcpy(tmpheaptop, gcheaptop, size);
876 // fill the remaining space with -2 padding
877 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
878 GC_BAMBOO_DEBUGPRINT(0xea05);
879 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
880 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
881 GC_BAMBOO_DEBUGPRINT_REG(size);
882 GC_BAMBOO_DEBUGPRINT_REG(isize);
883 GC_BAMBOO_DEBUGPRINT_REG(base);
885 // cache the mapping info
886 gcmappingtbl[OBJMAPPINGINDEX((int)ptr)] = (INTPTR)tmpheaptop;
887 GC_BAMBOO_DEBUGPRINT(0xcdca);
888 GC_BAMBOO_DEBUGPRINT_REG(ptr);
889 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
892 // set the gcsbstarttbl and bamboo_smemtbl
893 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
894 for(int k = 1; k < tmpsbs; k++) {
895 gcsbstarttbl[sb+k] = (INTPTR)(-1);
898 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
899 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
900 for(; b < tmpsbs; b++) {
901 bamboo_smemtbl[b] = bound;
902 if(b==NUMCORES4GC-1) {
903 bound = BAMBOO_SMEM_SIZE;
906 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
907 gcsbstarttbl[sb] = (INTPTR)(-1);
908 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
909 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
910 bamboo_smemtbl[b] = bound;
912 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
913 remain = tmpheaptop-gcbaseva;
914 bamboo_smemtbl[b] = remain%bound;
915 remain = bound - bamboo_smemtbl[b];
916 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
918 // close current block and fill the header
919 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
920 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
923 if(remain == BAMBOO_CACHE_LINE_SIZE) {
924 // fill with 0 in case
925 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
927 remain -= BAMBOO_CACHE_LINE_SIZE;
928 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
931 // move the large obj
932 if((int)gcheaptop < (int)(tmpheaptop)+size) {
933 memmove(tmpheaptop, gcheaptop, size);
935 memcpy(tmpheaptop, gcheaptop, size);
937 // fill the remaining space with -2 padding
938 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
939 GC_BAMBOO_DEBUGPRINT(0xea06);
940 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
941 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
942 GC_BAMBOO_DEBUGPRINT_REG(size);
943 GC_BAMBOO_DEBUGPRINT_REG(isize);
947 // cache the mapping info
948 gcmappingtbl[OBJMAPPINGINDEX((int)ptr)] = (INTPTR)tmpheaptop;
949 GC_BAMBOO_DEBUGPRINT(0xcdcc);
950 GC_BAMBOO_DEBUGPRINT_REG(ptr);
951 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
952 GC_BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
955 // update bamboo_smemtbl
956 bamboo_smemtbl[b] += isize;
957 } // if(remain < isize) else ...
958 } // while(gc_lobjmoreItems())
960 // close current block, fill the header
961 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
962 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
963 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
965 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
967 gcheaptop = tmpheaptop;
971 GC_BAMBOO_DEBUGPRINT(0xea07);
972 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
974 bamboo_free_block = 0;
977 tbound = (bamboo_free_block<NUMCORES4GC) ?
978 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
979 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
982 // the first non-full partition
988 // check how many live space there are
989 gc_num_livespace = 0;
990 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
991 gc_num_livespace += bamboo_smemtbl[tmpi];
993 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
995 GC_BAMBOO_DEBUGPRINT(0xea08);
996 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
997 } // void moveLObjs()
999 inline void markObj(void * objptr) {
1000 if(objptr == NULL) {
1003 if(ISSHAREDOBJ(objptr)) {
1004 int host = hostcore(objptr);
1005 if(BAMBOO_NUM_OF_CORE == host) {
1007 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1008 if(((int *)objptr)[BAMBOOMARKBIT] == INIT) {
1009 // this is the first time that this object is discovered,
1010 // set the flag as DISCOVERED
1011 ((int *)objptr)[BAMBOOMARKBIT] |= DISCOVERED;
1012 BAMBOO_CACHE_FLUSH_LINE(objptr);
1013 gc_enqueue_I(objptr);
1015 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1017 GC_BAMBOO_DEBUGPRINT(0xbbbb);
1018 GC_BAMBOO_DEBUGPRINT_REG(host);
1019 GC_BAMBOO_DEBUGPRINT_REG(objptr);
1020 // check if this obj has been forwarded
1021 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1022 // send a msg to host informing that objptr is active
1023 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1025 gc_num_forwardobj++;
1026 #endif // GC_PROFILE
1027 gcself_numsendobjs++;
1028 MGCHashadd(gcforwardobjtbl, (int)objptr);
1032 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1033 gc_enqueue_I(objptr);
1034 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1035 } // if(ISSHAREDOBJ(objptr))
1036 } // void markObj(void * objptr)
1038 // enqueue root objs
1039 inline void tomark(struct garbagelist * stackptr) {
1040 if(MARKPHASE != gcphase) {
1041 GC_BAMBOO_DEBUGPRINT_REG(gcphase);
1042 BAMBOO_EXIT(0xb001);
1044 gcbusystatus = true;
1049 // enqueue current stack
1050 while(stackptr!=NULL) {
1051 GC_BAMBOO_DEBUGPRINT(0xe501);
1052 GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
1053 GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
1054 GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1055 for(i=0; i<stackptr->size; i++) {
1056 if(stackptr->array[i] != NULL) {
1057 markObj(stackptr->array[i]);
1060 stackptr=stackptr->next;
1062 GC_BAMBOO_DEBUGPRINT(0xe502);
1064 // enqueue static pointers global_defs_p
1065 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1066 struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
1067 while(staticptr != NULL) {
1068 for(i=0; i<staticptr->size; i++) {
1069 if(staticptr->array[i] != NULL) {
1070 markObj(staticptr->array[i]);
1073 staticptr = staticptr->next;
1076 GC_BAMBOO_DEBUGPRINT(0xe503);
1079 // enqueue objectsets
1080 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1081 for(i=0; i<NUMCLASSES; i++) {
1082 struct parameterwrapper ** queues =
1083 objectqueues[BAMBOO_NUM_OF_CORE][i];
1084 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1085 for(j = 0; j < length; ++j) {
1086 struct parameterwrapper * parameter = queues[j];
1087 struct ObjectHash * set=parameter->objectset;
1088 struct ObjectNode * ptr=set->listhead;
1090 markObj((void *)ptr->key);
1097 // euqueue current task descriptor
1098 if(currtpd != NULL) {
1099 GC_BAMBOO_DEBUGPRINT(0xe504);
1100 for(i=0; i<currtpd->numParameters; i++) {
1101 markObj(currtpd->parameterArray[i]);
1105 GC_BAMBOO_DEBUGPRINT(0xe505);
1106 // euqueue active tasks
1107 if(activetasks != NULL) {
1108 struct genpointerlist * ptr=activetasks->list;
1110 struct taskparamdescriptor *tpd=ptr->src;
1112 for(i=0; i<tpd->numParameters; i++) {
1113 markObj(tpd->parameterArray[i]);
1119 GC_BAMBOO_DEBUGPRINT(0xe506);
1120 // enqueue cached transferred obj
1121 struct QueueItem * tmpobjptr = getHead(&objqueue);
1122 while(tmpobjptr != NULL) {
1123 struct transObjInfo * objInfo =
1124 (struct transObjInfo *)(tmpobjptr->objectptr);
1125 markObj(objInfo->objptr);
1126 tmpobjptr = getNextQueueItem(tmpobjptr);
1129 GC_BAMBOO_DEBUGPRINT(0xe507);
1130 // enqueue cached objs to be transferred
1131 struct QueueItem * item = getHead(totransobjqueue);
1132 while(item != NULL) {
1133 struct transObjInfo * totransobj =
1134 (struct transObjInfo *)(item->objectptr);
1135 markObj(totransobj->objptr);
1136 item = getNextQueueItem(item);
1137 } // while(item != NULL)
1139 GC_BAMBOO_DEBUGPRINT(0xe508);
1140 // enqueue lock related info
1141 for(i = 0; i < runtime_locklen; ++i) {
1142 markObj((void *)(runtime_locks[i].redirectlock));
1143 if(runtime_locks[i].value != NULL) {
1144 markObj((void *)(runtime_locks[i].value));
1147 GC_BAMBOO_DEBUGPRINT(0xe509);
1151 // enqueue global thread queue
1152 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1154 unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
1155 if(thread_counter > 0) {
1156 unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
1157 for(i = thread_counter; i > 0; i--) {
1158 markObj((void *)bamboo_thread_queue[4+start]);
1159 start = (start+1)&bamboo_max_thread_num_mask;
1164 // enqueue the bamboo_threadlocks
1165 if(bamboo_threadlocks != 0) {
1166 markObj((void *)bamboo_threadlocks);
1169 GC_BAMBOO_DEBUGPRINT(0xe50a);
1171 } // void tomark(struct garbagelist * stackptr)
1173 inline void mark(bool isfirst,
1174 struct garbagelist * stackptr) {
1175 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed01);
1177 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed02);
1178 // enqueue root objs
1180 gccurr_heaptop = 0; // record the size of all active objs in this core
1181 // aligned but does not consider block boundaries
1182 gcmarkedptrbound = 0;
1184 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed03);
1186 bool checkfield = true;
1187 bool sendStall = false;
1189 while(MARKPHASE == gcphase) {
1190 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed04);
1192 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1193 bool hasItems = gc_moreItems2_I();
1194 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1195 GC_BAMBOO_DEBUGPRINT(0xed05);
1200 gcbusystatus = true;
1202 void * ptr = gc_dequeue2_I();
1204 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1208 // check if it is a shared obj
1209 if(ISSHAREDOBJ(ptr)) {
1210 // a shared obj, check if it is a local obj on this core
1211 int host = hostcore(ptr);
1212 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1214 bool isnotmarked = ((((int *)ptr)[BAMBOOMARKBIT] & DISCOVERED) != 0);
1215 if(isLarge(ptr, &type, &size) && isnotmarked) {
1216 // ptr is a large object and not marked or enqueued
1217 GC_BAMBOO_DEBUGPRINT(0xecec);
1218 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1219 GC_BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1220 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1221 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1223 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1225 ((int *)ptr)[BAMBOOMARKBIT] =
1226 ((int *)ptr)[BAMBOOMARKBIT] & (~DISCOVERED) | MARKED;
1227 BAMBOO_CACHE_FLUSH_LINE(ptr);
1228 } else if(isnotmarked) {
1229 // ptr is an unmarked active object on this core
1230 ALIGNSIZE(size, &isize);
1231 gccurr_heaptop += isize;
1232 GC_BAMBOO_DEBUGPRINT(0xaaaa);
1233 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1234 GC_BAMBOO_DEBUGPRINT_REG(isize);
1235 GC_BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1237 ((int *)ptr)[BAMBOOMARKBIT] =
1238 ((int *)ptr)[BAMBOOMARKBIT] & (~DISCOVERED) | MARKED;
1239 BAMBOO_CACHE_FLUSH_LINE(ptr);
1241 if(ptr + size > gcmarkedptrbound) {
1242 gcmarkedptrbound = ptr + size;
1243 } // if(ptr + size > gcmarkedptrbound)
1245 // ptr is not an active obj or has been marked
1247 } // if(isLarge(ptr, &type, &size)) else ...
1248 } /* can never reach here
1250 // check if this obj has been forwarded
1251 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1252 // send a msg to host informing that ptr is active
1253 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1254 gcself_numsendobjs++;
1255 MGCHashadd(gcforwardobjtbl, (int)ptr);
1258 }// if(isLocal(ptr)) else ...*/
1259 } // if(ISSHAREDOBJ(ptr))
1260 GC_BAMBOO_DEBUGPRINT(0xed06);
1263 // scan all pointers in ptr
1264 unsigned INTPTR * pointer;
1265 pointer=pointerarray[type];
1267 /* Array of primitives */
1269 } else if (((INTPTR)pointer)==1) {
1270 /* Array of pointers */
1271 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1272 int length=ao->___length___;
1274 for(j=0; j<length; j++) {
1276 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1280 INTPTR size=pointer[0];
1282 for(i=1; i<=size; i++) {
1283 unsigned int offset=pointer[i];
1284 void * objptr=*((void **)(((char *)ptr)+offset));
1287 } // if (pointer==0) else if ... else ...
1289 } // while(gc_moreItems2())
1290 GC_BAMBOO_DEBUGPRINT(0xed07);
1291 gcbusystatus = false;
1292 // send mark finish msg to core coordinator
1293 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1294 GC_BAMBOO_DEBUGPRINT(0xed08);
1295 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1296 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1297 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1298 gcself_numreceiveobjs;
1299 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1302 GC_BAMBOO_DEBUGPRINT(0xed09);
1303 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1304 gcself_numsendobjs, gcself_numreceiveobjs, false);
1307 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1308 GC_BAMBOO_DEBUGPRINT(0xed0a);
1310 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1311 GC_BAMBOO_DEBUGPRINT(0xed0b);
1314 } // while(MARKPHASE == gcphase)
1319 inline void compact2Heaptophelper_I(int coren,
1324 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1325 if(STARTUPCORE == coren) {
1327 gcmovestartaddr = *p;
1328 gcdstcore = gctopcore;
1329 gcblock2fill = *numblocks + 1;
1331 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1333 GC_BAMBOO_DEBUGPRINT_REG(coren);
1334 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1335 GC_BAMBOO_DEBUGPRINT_REG(*p);
1336 GC_BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1337 if(memneed < *remain) {
1338 GC_BAMBOO_DEBUGPRINT(0xd104);
1340 gcrequiredmems[coren] = 0;
1341 gcloads[gctopcore] += memneed;
1342 *remain = *remain - memneed;
1344 GC_BAMBOO_DEBUGPRINT(0xd105);
1345 // next available block
1347 gcfilledblocks[gctopcore] += 1;
1349 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1350 gcloads[gctopcore] = newbase;
1351 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1352 gcstopblock[gctopcore]++;
1353 gctopcore = NEXTTOPCORE(gctopblock);
1355 *numblocks = gcstopblock[gctopcore];
1356 *p = gcloads[gctopcore];
1358 *remain=(b<NUMCORES4GC) ?
1359 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1360 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1361 GC_BAMBOO_DEBUGPRINT(0xd106);
1362 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1363 GC_BAMBOO_DEBUGPRINT_REG(*p);
1364 GC_BAMBOO_DEBUGPRINT_REG(b);
1365 GC_BAMBOO_DEBUGPRINT_REG(*remain);
1366 } // if(memneed < remain)
1368 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1370 inline void compact2Heaptop() {
1371 // no cores with spare mem and some cores are blocked with pending move
1372 // find the current heap top and make them move to the heap top
1374 int numblocks = gcfilledblocks[gctopcore];
1375 p = gcloads[gctopcore];
1378 int remain = (b<NUMCORES4GC) ?
1379 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1380 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1381 // check if the top core finishes
1382 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1383 if(gccorestatus[gctopcore] != 0) {
1384 GC_BAMBOO_DEBUGPRINT(0xd101);
1385 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1386 // let the top core finishes its own work first
1387 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1388 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1391 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1393 GC_BAMBOO_DEBUGPRINT(0xd102);
1394 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1395 GC_BAMBOO_DEBUGPRINT_REG(p);
1396 GC_BAMBOO_DEBUGPRINT_REG(b);
1397 GC_BAMBOO_DEBUGPRINT_REG(remain);
1398 for(int i = 0; i < NUMCORES4GC; i++) {
1399 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1400 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1401 GC_BAMBOO_DEBUGPRINT(0xd103);
1402 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1403 if(gccorestatus[gctopcore] != 0) {
1404 GC_BAMBOO_DEBUGPRINT(0xd101);
1405 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1406 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1407 // the top core is not free now
1410 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1411 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1412 } // for(i = 0; i < NUMCORES4GC; i++)
1413 GC_BAMBOO_DEBUGPRINT(0xd106);
1414 } // void compact2Heaptop()
1416 inline void resolvePendingMoveRequest() {
1417 GC_BAMBOO_DEBUGPRINT(0xeb01);
1418 GC_BAMBOO_DEBUGPRINT(0xeeee);
1419 for(int k = 0; k < NUMCORES4GC; k++) {
1420 GC_BAMBOO_DEBUGPRINT(0xf000+k);
1421 GC_BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1422 GC_BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1423 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1424 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1426 GC_BAMBOO_DEBUGPRINT(0xffff);
1429 bool nosparemem = true;
1430 bool haspending = false;
1431 bool hasrunning = false;
1432 bool noblock = false;
1433 int dstcore = 0; // the core who need spare mem
1434 int sourcecore = 0; // the core who has spare mem
1435 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1437 // check if there are cores with spare mem
1438 if(gccorestatus[i] == 0) {
1439 // finished working, check if it still have spare mem
1440 if(gcfilledblocks[i] < gcstopblock[i]) {
1441 // still have spare mem
1444 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1449 if(gccorestatus[j] != 0) {
1450 // not finished, check if it has pending move requests
1451 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1456 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1457 } // if(gccorestatus[i] == 0) else ...
1459 } // if(!haspending)
1460 if(!nosparemem && haspending) {
1464 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1465 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1466 gcrequiredmems[dstcore],
1469 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1470 GC_BAMBOO_DEBUGPRINT(0xeb02);
1471 GC_BAMBOO_DEBUGPRINT_REG(sourcecore);
1472 GC_BAMBOO_DEBUGPRINT_REG(dstcore);
1473 GC_BAMBOO_DEBUGPRINT_REG(startaddr);
1474 GC_BAMBOO_DEBUGPRINT_REG(tomove);
1475 if(STARTUPCORE == dstcore) {
1476 GC_BAMBOO_DEBUGPRINT(0xeb03);
1477 gcdstcore = sourcecore;
1479 gcmovestartaddr = startaddr;
1480 gcblock2fill = tomove;
1482 GC_BAMBOO_DEBUGPRINT(0xeb04);
1483 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1484 startaddr, tomove, false);
1491 } // for(i = 0; i < NUMCORES4GC; i++)
1492 GC_BAMBOO_DEBUGPRINT(0xcccc);
1493 GC_BAMBOO_DEBUGPRINT_REG(hasrunning);
1494 GC_BAMBOO_DEBUGPRINT_REG(haspending);
1495 GC_BAMBOO_DEBUGPRINT_REG(noblock);
1497 if(!hasrunning && !noblock) {
1498 gcphase = SUBTLECOMPACTPHASE;
1502 } // void resovePendingMoveRequest()
1505 int numblocks; // block num for heap
1506 INTPTR base; // base virtual address of current heap block
1507 INTPTR ptr; // virtual address of current heap top
1508 int offset; // offset in current heap block
1509 int blockbase; // virtual address of current small block to check
1510 int blockbound; // bound virtual address of current small blcok
1511 int sblockindex; // index of the small blocks
1512 int top; // real size of current heap block to check
1513 int bound; // bound size of current heap block to check
1514 }; // struct moveHelper
1516 // If out of boundary of valid shared memory, return false, else return true
1517 inline bool nextSBlock(struct moveHelper * orig) {
1518 orig->blockbase = orig->blockbound;
1520 bool sbchanged = false;
1521 INTPTR origptr = orig->ptr;
1522 int blockbase = orig->blockbase;
1523 int blockbound = orig->blockbound;
1524 int bound = orig->bound;
1525 GC_BAMBOO_DEBUGPRINT(0xecc0);
1526 GC_BAMBOO_DEBUGPRINT_REG(blockbase);
1527 GC_BAMBOO_DEBUGPRINT_REG(blockbound);
1528 GC_BAMBOO_DEBUGPRINT_REG(bound);
1529 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1531 // check if across a big block
1532 // TODO now do not zero out the whole memory, maybe the last two conditions
1534 if((blockbase>=bound)||(origptr>=bound)
1535 ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
1537 // end of current heap block, jump to next one
1539 GC_BAMBOO_DEBUGPRINT(0xecc1);
1540 GC_BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1541 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1542 GC_BAMBOO_DEBUGPRINT(orig->base);
1543 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1545 orig->ptr = orig->base; // set current ptr to out of boundary too
1548 orig->blockbase = orig->base;
1549 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1552 BLOCKINDEX(orig->base, &blocknum);
1553 if(bamboo_smemtbl[blocknum] == 0) {
1555 goto innernextSBlock;
1557 // check the bamboo_smemtbl to decide the real bound
1558 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1559 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1560 orig->sblockindex += 1;
1562 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1564 // check if this sblock should be skipped or have special start point
1565 int sbstart = gcsbstarttbl[orig->sblockindex];
1568 GC_BAMBOO_DEBUGPRINT(0xecc2);
1569 orig->sblockindex += 1;
1570 orig->blockbase += BAMBOO_SMEM_SIZE;
1571 goto outernextSBlock;
1572 } else if((sbstart != 0) && (sbchanged)) {
1573 // the first time to access this SBlock
1574 GC_BAMBOO_DEBUGPRINT(0xecc3);
1575 // not start from the very beginning
1576 orig->blockbase = sbstart;
1577 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1579 // setup information for this sblock
1580 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1581 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1582 orig->ptr = orig->blockbase + orig->offset;
1583 GC_BAMBOO_DEBUGPRINT(0xecc4);
1584 GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1585 GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1586 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1587 GC_BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1588 GC_BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1589 GC_BAMBOO_DEBUGPRINT_REG(orig->offset);
1590 if(orig->ptr >= orig->bound) {
1591 // met a lobj, move to next block
1592 goto innernextSBlock;
1596 } // bool nextSBlock(struct moveHelper * orig)
1598 // return false if there are no available data to compact
1599 inline bool initOrig_Dst(struct moveHelper * orig,
1600 struct moveHelper * to) {
1603 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1604 to->bound = BAMBOO_SMEM_SIZE_L;
1605 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1607 GC_BAMBOO_DEBUGPRINT(0xef01);
1608 GC_BAMBOO_DEBUGPRINT_REG(to->base);
1609 INTPTR tobase = to->base;
1610 to->ptr = tobase + to->offset;
1611 #ifdef GC_CACHE_ADAPT
1612 // initialize the gc_cache_revise_information
1613 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1614 int toindex = (tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
1615 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1617 gc_cache_revise_infomation.to_page_index = toindex;
1618 gc_cache_revise_infomation.orig_page_start_va = -1;
1619 #endif // GC_CACHE_ADAPT
1621 // init the orig ptr
1622 orig->numblocks = 0;
1623 orig->base = tobase;
1625 BLOCKINDEX(orig->base, &blocknum);
1626 INTPTR origbase = orig->base;
1627 // check the bamboo_smemtbl to decide the real bound
1628 orig->bound = origbase + bamboo_smemtbl[blocknum];
1629 orig->blockbase = origbase;
1630 orig->sblockindex = (origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
1631 GC_BAMBOO_DEBUGPRINT(0xef02);
1632 GC_BAMBOO_DEBUGPRINT_REG(origbase);
1633 GC_BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1634 GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1635 GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1637 int sbstart = gcsbstarttbl[orig->sblockindex];
1639 GC_BAMBOO_DEBUGPRINT(0xef03);
1642 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1643 return nextSBlock(orig);
1644 } else if(sbstart != 0) {
1645 GC_BAMBOO_DEBUGPRINT(0xef04);
1646 orig->blockbase = sbstart;
1648 GC_BAMBOO_DEBUGPRINT(0xef05);
1649 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1650 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1651 orig->ptr = orig->blockbase + orig->offset;
1652 GC_BAMBOO_DEBUGPRINT(0xef06);
1653 GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1656 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1658 inline void nextBlock(struct moveHelper * to) {
1659 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1660 to->bound += BAMBOO_SMEM_SIZE;
1662 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1663 to->offset = BAMBOO_CACHE_LINE_SIZE;
1664 to->ptr = to->base + to->offset;
1665 } // void nextBlock(struct moveHelper * to)
1667 #ifdef GC_CACHE_ADAPT
1668 inline void samplingDataConvert(int current_ptr) {
1669 unsigned int tmp_factor =
1670 current_ptr-gc_cache_revise_infomation.to_page_start_va;
1671 int topage=gc_cache_revise_infomation.to_page_index;
1672 int oldpage = gc_cache_revise_infomation.orig_page_index;
1673 unsigned int * newtable=&gccachesamplingtbl_r[topage];
1674 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
1676 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1677 (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
1678 newtable=(unsigned int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1679 oldtable=(unsigned int*) (((char *)oldtable)+size_cachesamplingtbl_local);
1681 } // inline void samplingDataConvert(int)
1683 inline void completePageConvert(struct moveHelper * orig,
1684 struct moveHelper * to,
1691 tocompare = gc_cache_revise_infomation.to_page_end_va;
1694 tocompare = gc_cache_revise_infomation.orig_page_end_va;
1696 if(ptr >= tocompare) {
1697 // end of an orig/to page
1698 // compute the impact of this page for the new page
1699 samplingDataConvert(current_ptr);
1700 // prepare for an new orig page
1701 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1702 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1703 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1704 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1705 gc_cache_revise_infomation.orig_page_index = tmp_index;
1706 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1708 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
1709 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1710 gc_cache_revise_infomation.to_page_index =
1711 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1714 } // inline void completePageConvert(...)
1715 #endif // GC_CACHE_ADAPT
1717 // endaddr does not contain spaces for headers
1718 inline bool moveobj(struct moveHelper * orig,
1719 struct moveHelper * to,
1721 if(stopblock == 0) {
1725 GC_BAMBOO_DEBUGPRINT(0xe201);
1726 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1727 GC_BAMBOO_DEBUGPRINT_REG(to->ptr);
1734 while((*((char*)(orig->ptr))) == (char)(-2)) {
1735 orig->ptr = (void*)(orig->ptr) + 1;
1737 #ifdef GC_CACHE_ADAPT
1738 completePageConvert(orig, to, to->ptr, false);
1740 INTPTR origptr = orig->ptr;
1741 int origbound = orig->bound;
1742 int origblockbound = orig->blockbound;
1743 if((origptr >= origbound) || (origptr == origblockbound)) {
1744 if(!nextSBlock(orig)) {
1745 // finished, no more data
1750 GC_BAMBOO_DEBUGPRINT(0xe202);
1751 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1752 GC_BAMBOO_DEBUGPRINT(((int *)(origptr))[0]);
1753 // check the obj's type, size and mark flag
1754 type = ((int *)(origptr))[0];
1757 // end of this block, go to next one
1758 if(!nextSBlock(orig)) {
1759 // finished, no more data
1763 } else if(type < NUMCLASSES) {
1765 size = classsize[type];
1768 struct ArrayObject *ao=(struct ArrayObject *)(origptr);
1769 int elementsize=classsize[type];
1770 int length=ao->___length___;
1771 size=sizeof(struct ArrayObject)+length*elementsize;
1773 mark = ((int *)(origptr))[BAMBOOMARKBIT];
1774 bool isremote = ((((int *)(origptr))[BAMBOOMARKBIT] & REMOTEM) != 0);
1775 GC_BAMBOO_DEBUGPRINT(0xe203);
1776 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1777 GC_BAMBOO_DEBUGPRINT_REG(size);
1778 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
1779 // should be able to across it
1780 if((mark & MARKED) != 0) {
1781 int totop = to->top;
1782 int tobound = to->bound;
1783 GC_BAMBOO_DEBUGPRINT(0xe204);
1787 // marked obj, copy it to current heap top
1788 // check to see if remaining space is enough
1789 if(totop + isize > tobound) {
1790 // fill 0 indicating the end of this block
1791 BAMBOO_MEMSET_WH(to->ptr, '\0', tobound - totop);
1792 // fill the header of this block and then go to next block
1793 to->offset += tobound - totop;
1794 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1795 (*((int*)(to->base))) = to->offset;
1796 #ifdef GC_CACHE_ADAPT
1797 int tmp_ptr = to->ptr;
1798 #endif // GC_CACHE_ADAPT
1800 #ifdef GC_CACHE_ADAPT
1801 completePageConvert(orig, to, tmp_ptr, true);
1802 #endif // GC_CACHE_ADAPT
1803 if(stopblock == to->numblocks) {
1804 // already fulfilled the block
1806 } // if(stopblock == to->numblocks)
1807 } // if(to->top + isize > to->bound)
1808 // set the mark field to 2, indicating that this obj has been moved
1809 // and need to be flushed
1810 ((int *)(origptr))[BAMBOOMARKBIT] = COMPACTED;
1811 INTPTR toptr = to->ptr;
1812 if(toptr != origptr) {
1813 if((int)(origptr) < (int)(toptr)+size) {
1814 memmove(toptr, origptr, size);
1816 memcpy(toptr, origptr, size);
1818 // fill the remaining space with -2
1819 BAMBOO_MEMSET_WH(toptr+size, -2, isize-size);
1821 // store mapping info
1822 gcmappingtbl[OBJMAPPINGINDEX((int)origptr)] = (INTPTR)toptr;
1823 GC_BAMBOO_DEBUGPRINT(0xcdce);
1824 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1825 GC_BAMBOO_DEBUGPRINT_REG(toptr);
1826 GC_BAMBOO_DEBUGPRINT_REG(isize);
1827 gccurr_heaptop -= isize;
1829 to->offset += isize;
1831 #ifdef GC_CACHE_ADAPT
1832 int tmp_ptr = to->ptr;
1833 #endif // GC_CACHE_ADAPT
1834 if(to->top == to->bound) {
1835 // fill the header of this block and then go to next block
1836 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1837 (*((int*)(to->base))) = to->offset;
1840 #ifdef GC_CACHE_ADAPT
1841 completePageConvert(orig, to, tmp_ptr, true);
1842 #endif // GC_CACHE_ADAPT
1844 GC_BAMBOO_DEBUGPRINT(0xe205);
1849 GC_BAMBOO_DEBUGPRINT_REG(isize);
1850 GC_BAMBOO_DEBUGPRINT_REG(size);
1851 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1852 GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1853 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1854 GC_BAMBOO_DEBUGPRINT(0xe206);
1855 if(!nextSBlock(orig)) {
1856 // finished, no more data
1860 GC_BAMBOO_DEBUGPRINT(0xe207);
1861 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1863 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
1865 // should be invoked with interrupt closed
1866 inline int assignSpareMem_I(int sourcecore,
1871 BLOCKINDEX(gcloads[sourcecore], &b);
1872 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
1873 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
1874 int remain = boundptr - gcloads[sourcecore];
1875 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
1876 *startaddr = gcloads[sourcecore];
1877 *tomove = gcfilledblocks[sourcecore] + 1;
1878 if(memneed < remain) {
1879 gcloads[sourcecore] += memneed;
1882 // next available block
1883 gcfilledblocks[sourcecore] += 1;
1885 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
1886 gcloads[sourcecore] = newbase;
1887 return requiredmem-remain;
1889 } // int assignSpareMem_I(int ,int * , int * , int * )
1891 // should be invoked with interrupt closed
1892 inline bool gcfindSpareMem_I(int * startaddr,
1897 for(int k = 0; k < NUMCORES4GC; k++) {
1898 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
1899 // check if this stopped core has enough mem
1900 assignSpareMem_I(k, requiredmem, tomove, startaddr);
1905 // if can not find spare mem right now, hold the request
1906 gcrequiredmems[requiredcore] = requiredmem;
1909 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
1911 inline bool compacthelper(struct moveHelper * orig,
1912 struct moveHelper * to,
1915 bool * localcompact) {
1916 // scan over all objs in this block, compact the marked objs
1917 // loop stop when finishing either scanning all active objs or
1918 // fulfilled the gcstopblock
1919 GC_BAMBOO_DEBUGPRINT(0xe101);
1920 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
1921 GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1923 while(orig->ptr < gcmarkedptrbound) {
1924 bool stop = moveobj(orig, to, gcblock2fill);
1929 #ifdef GC_CACHE_ADAPT
1930 // end of an to page, wrap up its information
1931 samplingDataConvert(to->ptr);
1932 #endif // GC_CACHE_ADAPT
1933 // if no objs have been compact, do nothing,
1934 // otherwise, fill the header of this block
1935 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
1936 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1937 (*((int*)(to->base))) = to->offset;
1941 to->top -= BAMBOO_CACHE_LINE_SIZE;
1942 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
1944 *heaptopptr = to->ptr;
1945 *filledblocks = to->numblocks;
1947 GC_BAMBOO_DEBUGPRINT(0xe102);
1948 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949 GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1950 GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1951 GC_BAMBOO_DEBUGPRINT_REG(*filledblocks);
1952 GC_BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
1954 // send msgs to core coordinator indicating that the compact is finishing
1955 // send compact finish message to core coordinator
1956 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1957 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
1958 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
1959 if(orig->ptr < gcmarkedptrbound) {
1960 GC_BAMBOO_DEBUGPRINT(0xe103);
1963 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1964 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
1965 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
1966 GC_BAMBOO_DEBUGPRINT(0xe104);
1969 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1970 GC_BAMBOO_DEBUGPRINT(0xe105);
1973 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1975 GC_BAMBOO_DEBUGPRINT(0xe106);
1976 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1981 if(orig->ptr < gcmarkedptrbound) {
1982 GC_BAMBOO_DEBUGPRINT(0xe107);
1985 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1986 *filledblocks, *heaptopptr, gccurr_heaptop, false);
1988 GC_BAMBOO_DEBUGPRINT(0xe108);
1989 GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1990 // finish compacting
1991 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1992 *filledblocks, *heaptopptr, 0, false);
1994 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
1996 if(orig->ptr < gcmarkedptrbound) {
1997 GC_BAMBOO_DEBUGPRINT(0xe109);
1998 // still have unpacked obj
2006 GC_BAMBOO_DEBUGPRINT(0xe10a);
2008 to->ptr = gcmovestartaddr;
2009 to->numblocks = gcblock2fill - 1;
2010 to->bound = (to->numblocks==0) ?
2011 BAMBOO_SMEM_SIZE_L :
2012 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2013 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2014 to->offset = to->ptr - to->base;
2015 to->top = (to->numblocks==0) ?
2016 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2018 to->offset = BAMBOO_CACHE_LINE_SIZE;
2019 to->ptr += to->offset; // for header
2020 to->top += to->offset;
2021 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2022 *localcompact = true;
2024 *localcompact = false;
2026 #ifdef GC_CACHE_ADAPT
2027 // initialize the gc_cache_revise_information
2028 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2029 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2030 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2031 gc_cache_revise_infomation.to_page_index =
2032 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2033 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2034 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2035 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2036 gc_cache_revise_infomation.orig_page_index =
2037 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2038 #endif // GC_CACHE_ADAPT
2041 GC_BAMBOO_DEBUGPRINT(0xe10b);
2043 } // void compacthelper()
2045 inline void compact() {
2046 if(COMPACTPHASE != gcphase) {
2047 BAMBOO_EXIT(0xb002);
2050 // initialize pointers for comapcting
2051 struct moveHelper * orig =
2052 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2053 struct moveHelper * to =
2054 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2055 if(!initOrig_Dst(orig, to)) {
2056 // no available data to compact
2057 // send compact finish msg to STARTUP core
2058 GC_BAMBOO_DEBUGPRINT(0xe001);
2059 GC_BAMBOO_DEBUGPRINT_REG(to->base);
2060 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2061 0, to->base, 0, false);
2066 #ifdef GC_CACHE_ADAPT
2067 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2068 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2069 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2070 gc_cache_revise_infomation.orig_page_index =
2071 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2072 #endif // GC_CACHE_ADAPT
2074 int filledblocks = 0;
2075 INTPTR heaptopptr = 0;
2076 bool localcompact = true;
2077 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2082 // if return NULL, means
2083 // 1. objptr is NULL
2084 // 2. objptr is not a shared obj
2085 // in these cases, remain the original value is OK
2086 inline void * flushObj(void * objptr) {
2087 GC_BAMBOO_DEBUGPRINT(0xe401);
2088 if(objptr == NULL) {
2091 void * dstptr = NULL;
2092 if(ISSHAREDOBJ(objptr)) {
2093 GC_BAMBOO_DEBUGPRINT(0xe402);
2094 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2095 // a shared obj ptr, change to new address
2096 dstptr = gcmappingtbl[OBJMAPPINGINDEX((int)objptr)];
2097 GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2099 if(NULL == dstptr) {
2101 GC_BAMBOO_DEBUGPRINT(0xe403);
2102 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2103 GC_BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2104 // error! the obj is right on this core, but cannot find it
2105 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2106 BAMBOO_EXIT(0xb003);
2107 } // if(NULL == dstptr)
2108 } // if(ISSHAREDOBJ(objptr))
2109 // if not a shared obj, return NULL to indicate no need to flush
2110 GC_BAMBOO_DEBUGPRINT(0xe404);
2112 } // void flushObj(void * objptr)
2114 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2116 // flush current stack
2117 while(stackptr!=NULL) {
2118 for(i=0; i<stackptr->size; i++) {
2119 if(stackptr->array[i] != NULL) {
2120 void * dst = flushObj(stackptr->array[i]);
2122 stackptr->array[i] = dst;
2126 stackptr=stackptr->next;
2129 // flush static pointers global_defs_p
2130 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2131 struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
2132 for(i=0; i<staticptr->size; i++) {
2133 if(staticptr->array[i] != NULL) {
2134 void * dst = flushObj(staticptr->array[i]);
2136 staticptr->array[i] = dst;
2144 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2145 for(i=0; i<NUMCLASSES; i++) {
2146 struct parameterwrapper ** queues =
2147 objectqueues[BAMBOO_NUM_OF_CORE][i];
2148 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2149 for(j = 0; j < length; ++j) {
2150 struct parameterwrapper * parameter = queues[j];
2151 struct ObjectHash * set=parameter->objectset;
2152 struct ObjectNode * ptr=set->listhead;
2154 void * dst = flushObj((void *)ptr->key);
2160 ObjectHashrehash(set);
2165 // flush current task descriptor
2166 if(currtpd != NULL) {
2167 for(i=0; i<currtpd->numParameters; i++) {
2168 void * dst = flushObj(currtpd->parameterArray[i]);
2170 currtpd->parameterArray[i] = dst;
2175 // flush active tasks
2176 if(activetasks != NULL) {
2177 struct genpointerlist * ptr=activetasks->list;
2179 struct taskparamdescriptor *tpd=ptr->src;
2181 for(i=0; i<tpd->numParameters; i++) {
2182 void * dst = flushObj(tpd->parameterArray[i]);
2184 tpd->parameterArray[i] = dst;
2189 genrehash(activetasks);
2192 // flush cached transferred obj
2193 struct QueueItem * tmpobjptr = getHead(&objqueue);
2194 while(tmpobjptr != NULL) {
2195 struct transObjInfo * objInfo =
2196 (struct transObjInfo *)(tmpobjptr->objectptr);
2197 void * dst = flushObj(objInfo->objptr);
2199 objInfo->objptr = dst;
2201 tmpobjptr = getNextQueueItem(tmpobjptr);
2204 // flush cached objs to be transferred
2205 struct QueueItem * item = getHead(totransobjqueue);
2206 while(item != NULL) {
2207 struct transObjInfo * totransobj =
2208 (struct transObjInfo *)(item->objectptr);
2209 void * dst = flushObj(totransobj->objptr);
2211 totransobj->objptr = dst;
2213 item = getNextQueueItem(item);
2214 } // while(item != NULL)
2216 // enqueue lock related info
2217 for(i = 0; i < runtime_locklen; ++i) {
2218 void * dst = flushObj(runtime_locks[i].redirectlock);
2220 runtime_locks[i].redirectlock = (int)dst;
2222 if(runtime_locks[i].value != NULL) {
2223 void * dst=flushObj(runtime_locks[i].value);
2225 runtime_locks[i].value = (int)dst;
2232 // flush the bamboo_threadlocks
2233 if(bamboo_threadlocks != 0) {
2234 bamboo_threadlocks = (INTPTR)(flushObj((void *)bamboo_threadlocks));
2237 // flush global thread queue
2238 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2239 unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
2240 if(thread_counter > 0) {
2241 unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
2242 for(i = thread_counter; i > 0; i--) {
2243 bamboo_thread_queue[4+start] =
2244 (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start]));
2245 start = (start+1)&bamboo_max_thread_num_mask;
2248 unlockthreadqueue();
2251 } // void flushRuntimeObj(struct garbagelist * stackptr)
2253 inline void flush(struct garbagelist * stackptr) {
2255 flushRuntimeObj(stackptr);
2258 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2259 bool hasItems = gc_moreItems_I();
2260 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2265 GC_BAMBOO_DEBUGPRINT(0xe301);
2266 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2267 void * ptr = gc_dequeue_I();
2268 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2269 if(ISSHAREDOBJ(ptr)) {
2270 // should be a local shared obj and should have mapping info
2271 ptr = flushObj(ptr);
2272 GC_BAMBOO_DEBUGPRINT(0xe302);
2273 GC_BAMBOO_DEBUGPRINT_REG(ptr);
2274 GC_BAMBOO_DEBUGPRINT_REG(tptr);
2275 GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2277 BAMBOO_EXIT(0xb004);
2279 } // if(ISSHAREDOBJ(ptr))
2280 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED)) {
2281 int type = ((int *)(ptr))[0];
2282 // scan all pointers in ptr
2283 unsigned INTPTR * pointer;
2284 pointer=pointerarray[type];
2285 GC_BAMBOO_DEBUGPRINT(0xe303);
2286 GC_BAMBOO_DEBUGPRINT_REG(pointer);
2288 /* Array of primitives */
2290 } else if (((INTPTR)pointer)==1) {
2291 GC_BAMBOO_DEBUGPRINT(0xe304);
2292 /* Array of pointers */
2293 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2294 int length=ao->___length___;
2296 for(j=0; j<length; j++) {
2297 GC_BAMBOO_DEBUGPRINT(0xe305);
2299 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2300 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2301 if(objptr != NULL) {
2302 void * dst = flushObj(objptr);
2304 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2309 GC_BAMBOO_DEBUGPRINT(0xe306);
2310 INTPTR size=pointer[0];
2312 for(i=1; i<=size; i++) {
2313 GC_BAMBOO_DEBUGPRINT(0xe307);
2314 unsigned int offset=pointer[i];
2315 void * objptr=*((void **)(((char *)ptr)+offset));
2316 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2317 if(objptr != NULL) {
2318 void * dst = flushObj(objptr);
2320 *((void **)(((char *)ptr)+offset)) = dst;
2323 } // for(i=1; i<=size; i++)
2324 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2325 // restore the mark field, indicating that this obj has been flushed
2326 if(ISSHAREDOBJ(ptr)) {
2327 ((int *)(ptr))[BAMBOOMARKBIT] = INIT;
2329 } //if((!ISSHAREDOBJ(ptr))||(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED))
2330 } // while(gc_moreItems())
2331 GC_BAMBOO_DEBUGPRINT(0xe308);
2333 // TODO bug here: the startup core contains all lobjs' info, thus all the
2334 // lobjs are flushed in sequence.
2336 while(gc_lobjmoreItems_I()) {
2337 GC_BAMBOO_DEBUGPRINT(0xe309);
2338 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2339 ptr = flushObj(ptr);
2340 GC_BAMBOO_DEBUGPRINT(0xe30a);
2341 GC_BAMBOO_DEBUGPRINT_REG(ptr);
2342 GC_BAMBOO_DEBUGPRINT_REG(tptr);
2343 GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2345 BAMBOO_EXIT(0xb005);
2347 if(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED) {
2348 int type = ((int *)(ptr))[0];
2349 // scan all pointers in ptr
2350 unsigned INTPTR * pointer;
2351 pointer=pointerarray[type];
2352 GC_BAMBOO_DEBUGPRINT(0xe30b);
2353 GC_BAMBOO_DEBUGPRINT_REG(pointer);
2355 /* Array of primitives */
2357 } else if (((INTPTR)pointer)==1) {
2358 GC_BAMBOO_DEBUGPRINT(0xe30c);
2359 /* Array of pointers */
2360 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2361 int length=ao->___length___;
2363 for(j=0; j<length; j++) {
2364 GC_BAMBOO_DEBUGPRINT(0xe30d);
2366 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2367 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2368 if(objptr != NULL) {
2369 void * dst = flushObj(objptr);
2371 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2376 GC_BAMBOO_DEBUGPRINT(0xe30e);
2377 INTPTR size=pointer[0];
2379 for(i=1; i<=size; i++) {
2380 GC_BAMBOO_DEBUGPRINT(0xe30f);
2381 unsigned int offset=pointer[i];
2382 void * objptr=*((void **)(((char *)ptr)+offset));
2384 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2385 if(objptr != NULL) {
2386 void * dst = flushObj(objptr);
2388 *((void **)(((char *)ptr)+offset)) = dst;
2391 } // for(i=1; i<=size; i++)
2392 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2393 // restore the mark field, indicating that this obj has been flushed
2394 ((int *)(ptr))[BAMBOOMARKBIT] = INIT;
2395 } // if(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED)
2396 } // while(gc_lobjmoreItems())
2397 GC_BAMBOO_DEBUGPRINT(0xe310);
2399 // send flush finish message to core coordinator
2400 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2401 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2403 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2405 GC_BAMBOO_DEBUGPRINT(0xe311);
2408 #ifdef GC_CACHE_ADAPT
2409 // prepare for cache adaption:
2410 // -- flush the shared heap
2411 // -- clean dtlb entries
2412 // -- change cache strategy
2413 void cacheAdapt_gc(bool isgccachestage) {
2414 // flush the shared heap
2415 BAMBOO_CACHE_FLUSH_L2();
2417 // clean the dtlb entries
2418 BAMBOO_CLEAN_DTLB();
2420 // change the cache strategy
2421 gccachestage = isgccachestage;
2422 } // cacheAdapt_gc(bool isgccachestage)
2424 // the master core decides how to adapt cache strategy for the mutator
2425 // according to collected statistic data
2427 // make all pages hfh
2428 int cacheAdapt_policy_h4h(){
2429 unsigned int page_index = 0;
2431 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2433 int * tmp_p = gccachepolicytbl+1;
2434 for(page_index = 0; page_index < page_num; page_index++) {
2435 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2436 bamboo_cache_policy_t policy = {0};
2437 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2438 *tmp_p = page_index;
2440 *tmp_p = policy.word;
2446 } // int cacheAdapt_policy_hfh()
2448 // make all pages local as non-cache-adaptable gc local mode
2449 int cacheAdapt_policy_local(){
2450 unsigned int page_index = 0;
2452 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2454 int * tmp_p = gccachepolicytbl+1;
2455 for(page_index = 0; page_index < page_num; page_index++) {
2456 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2457 bamboo_cache_policy_t policy = {0};
2459 BLOCKINDEX(page_sva, &block);
2460 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2461 // locally cache the page in the hotest core
2462 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2463 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2464 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2465 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2466 *tmp_p = page_index;
2468 *tmp_p = policy.word;
2474 } // int cacheAdapt_policy_local()
2476 int cacheAdapt_policy_hotest(){
2477 unsigned int page_index = 0;
2479 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2481 int * tmp_p = gccachepolicytbl+1;
2482 for(page_index = 0; page_index < page_num; page_index++) {
2483 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2484 bamboo_cache_policy_t policy = {0};
2486 unsigned int hotfreq = 0;
2488 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2489 for(int i = 0; i < NUMCORESACTIVE; i++) {
2490 unsigned int freq = *local_tbl;
2491 local_tbl=(unsigned int *)(
2492 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2494 // check the freqency, decide if this page is hot for the core
2495 if(hotfreq < freq) {
2501 // Decide the cache strategy for this page
2502 // If decide to adapt a new cache strategy, write into the shared block of
2503 // the gcsharedsamplingtbl. The mem recording information that has been
2504 // written is enough to hold the information.
2505 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2507 // this page has not been accessed, do not change its cache policy
2510 // locally cache the page in the hotest core
2511 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2512 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2513 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2514 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2515 *tmp_p = page_index;
2517 *tmp_p = policy.word;
2524 } // int cacheAdapt_policy_hotest()
2526 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
2527 // cache the page on the core that accesses it the most if that core accesses
2528 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
2530 int cacheAdapt_policy_dominate(){
2531 unsigned int page_index = 0;
2533 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2535 int * tmp_p = gccachepolicytbl+1;
2536 for(page_index = 0; page_index < page_num; page_index++) {
2537 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2538 bamboo_cache_policy_t policy = {0};
2540 unsigned long long totalfreq = 0;
2541 unsigned int hotfreq = 0;
2543 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2544 for(int i = 0; i < NUMCORESACTIVE; i++) {
2545 unsigned int freq = *local_tbl;
2546 local_tbl=(unsigned int *)(
2547 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2549 // check the freqency, decide if this page is hot for the core
2550 if(hotfreq < freq) {
2556 // Decide the cache strategy for this page
2557 // If decide to adapt a new cache strategy, write into the shared block of
2559 // Format: page start va + cache policy
2561 // this page has not been accessed, do not change its cache policy
2565 (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
2566 hotfreq/=BAMBOO_PAGE_SIZE;
2567 if(hotfreq < totalfreq) {
2569 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2571 // locally cache the page in the hotest core
2572 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2573 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2574 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2575 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2577 *tmp_p = page_index;
2579 *tmp_p = policy.word;
2585 } // int cacheAdapt_policy_dominate()
2587 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
2589 void gc_quicksort(unsigned long long *array,
2595 int rightIdx = right;
2596 if((right-left+1) >= 1) {
2597 pivot = (left+right)/2;
2598 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
2599 int pivotValue = array[pivot*3-offset];
2600 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
2603 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
2606 // swap [leftIdx] & [rightIdx]
2607 for(int k = 0; k < 3; k++) {
2608 unsigned long long tmp = array[3*rightIdx-k];
2609 array[3*rightIdx-k] = array[3*leftIdx-k];
2610 array[3*leftIdx-k] = tmp;
2614 if((leftIdx-1) == pivot) {
2615 pivot = rightIdx = rightIdx + 1;
2616 } else if((leftIdx+1) == pivot) {
2617 pivot = leftIdx = leftIdx-1;
2620 gc_quicksort(array, left, pivot-1, offset);
2621 gc_quicksort(array, pivot+1, right, offset);
2624 } // void gc_quicksort(...)
2626 // Every page cached on the core that accesses it the most.
2627 // Check to see if any core's pages total more accesses than threshold
2628 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2629 // most remote accesses and hash for home them until we get below
2630 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
2631 int cacheAdapt_policy_overload(){
2632 unsigned int page_index = 0;
2634 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2636 int * tmp_p = gccachepolicytbl+1;
2637 unsigned long long workload[NUMCORESACTIVE];
2638 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2639 unsigned long long total_workload = 0;
2640 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2641 memset(core2heavypages,0,
2642 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2643 for(page_index = 0; page_index < page_num; page_index++) {
2644 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2645 bamboo_cache_policy_t policy = {0};
2647 unsigned long long totalfreq = 0;
2648 unsigned int hotfreq = 0;
2650 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2651 for(int i = 0; i < NUMCORESACTIVE; i++) {
2652 unsigned int freq = *local_tbl;
2653 local_tbl=(unsigned int *)(
2654 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2656 // check the freqency, decide if this page is hot for the core
2657 if(hotfreq < freq) {
2662 // Decide the cache strategy for this page
2663 // If decide to adapt a new cache strategy, write into the shared block of
2664 // the gcsharedsamplingtbl. The mem recording information that has been
2665 // written is enough to hold the information.
2666 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2668 // this page has not been accessed, do not change its cache policy
2672 totalfreq/=BAMBOO_PAGE_SIZE;
2673 hotfreq/=BAMBOO_PAGE_SIZE;
2674 // locally cache the page in the hotest core
2675 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2676 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2677 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2678 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2679 *tmp_p = page_index;
2681 *tmp_p = policy.word;
2684 workload[hotestcore] += totalfreq;
2685 total_workload += totalfreq;
2686 // insert into core2heavypages using quicksort
2687 unsigned long long remoteaccess = totalfreq - hotfreq;
2688 int index = (int)core2heavypages[hotestcore][0];
2689 core2heavypages[hotestcore][3*index+3] = remoteaccess;
2690 core2heavypages[hotestcore][3*index+2] = totalfreq;
2691 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2692 core2heavypages[hotestcore][0]++;
2695 unsigned long long workload_threshold =
2696 total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2697 // Check the workload of each core
2698 for(int i = 0; i < NUMCORESACTIVE; i++) {
2700 int index = (int)core2heavypages[i][0];
2701 if(workload[i] > workload_threshold) {
2702 // sort according to the remoteaccess
2703 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2704 while((workload[i] > workload_threshold) && (j<index*3)) {
2705 // hfh those pages with more remote accesses
2706 bamboo_cache_policy_t policy = {0};
2707 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2708 *((int*)core2heavypages[i][j]) = policy.word;
2709 workload[i] -= core2heavypages[i][j+1];
2716 } // int cacheAdapt_policy_overload()
2718 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
2719 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
2720 // Every page cached on the core that accesses it the most.
2721 // Check to see if any core's pages total more accesses than threshold
2722 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2723 // most remote accesses and hash for home them until we get below
2724 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
2725 // Sort pages based on activity....
2726 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
2727 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
2728 // then start hfh these pages(selecting the ones with the most remote
2729 // accesses first or fewest local accesses) until we get below
2730 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
2731 int cacheAdapt_policy_crowd(){
2732 unsigned int page_index = 0;
2734 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2736 int * tmp_p = gccachepolicytbl+1;
2737 unsigned long long workload[NUMCORESACTIVE];
2738 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2739 unsigned long long total_workload = 0;
2740 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2741 memset(core2heavypages,0,
2742 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2743 for(page_index = 0; page_index < page_num; page_index++) {
2744 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2745 bamboo_cache_policy_t policy = {0};
2747 unsigned long long totalfreq = 0;
2748 unsigned int hotfreq = 0;
2750 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2751 for(int i = 0; i < NUMCORESACTIVE; i++) {
2752 unsigned int freq = *local_tbl;
2753 local_tbl=(unsigned int *)(
2754 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2756 // check the freqency, decide if this page is hot for the core
2757 if(hotfreq < freq) {
2762 // Decide the cache strategy for this page
2763 // If decide to adapt a new cache strategy, write into the shared block of
2764 // the gcsharedsamplingtbl. The mem recording information that has been
2765 // written is enough to hold the information.
2766 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2768 // this page has not been accessed, do not change its cache policy
2771 totalfreq/=BAMBOO_PAGE_SIZE;
2772 hotfreq/=BAMBOO_PAGE_SIZE;
2773 // locally cache the page in the hotest core
2774 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2775 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2776 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2777 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2778 *tmp_p = page_index;
2780 *tmp_p = policy.word;
2783 workload[hotestcore] += totalfreq;
2784 total_workload += totalfreq;
2785 // insert into core2heavypages using quicksort
2786 unsigned long long remoteaccess = totalfreq - hotfreq;
2787 int index = (int)core2heavypages[hotestcore][0];
2788 core2heavypages[hotestcore][3*index+3] = remoteaccess;
2789 core2heavypages[hotestcore][3*index+2] = totalfreq;
2790 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2791 core2heavypages[hotestcore][0]++;
2794 unsigned long long workload_threshold =
2795 total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2796 // Check the workload of each core
2797 for(int i = 0; i < NUMCORESACTIVE; i++) {
2799 int index = (int)core2heavypages[i][0];
2800 if(workload[i] > workload_threshold) {
2801 // sort according to the remoteaccess
2802 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2803 while((workload[i] > workload_threshold) && (j<index*3)) {
2804 // hfh those pages with more remote accesses
2805 bamboo_cache_policy_t policy = {0};
2806 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2807 *((int*)core2heavypages[i][j]) = policy.word;
2808 workload[i] -= core2heavypages[i][j+1];
2813 // Check if the accesses are crowded on few pages
2814 // sort according to the total access
2816 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
2817 unsigned long long threshold =
2818 GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2819 int num_crowded = 0;
2820 unsigned long long t_workload = 0;
2822 t_workload += core2heavypages[i][j+num_crowded*3+1];
2824 } while(t_workload < threshold);
2825 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
2826 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
2827 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
2828 // need to hfh these pages
2829 // sort the pages according to remote access
2830 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
2831 // h4h those pages with more remote accesses
2832 bamboo_cache_policy_t policy = {0};
2833 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2834 *((int*)core2heavypages[i][j]) = policy.word;
2835 workload[i] -= core2heavypages[i][j+1];
2836 t_workload -= core2heavypages[i][j+1];
2838 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2844 } // int cacheAdapt_policy_overload()
2846 void cacheAdapt_master() {
2847 #ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
2848 gc_output_cache_sampling_r();
2849 #endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
2851 // check the statistic data
2852 // for each page, decide the new cache strategy
2853 #ifdef GC_CACHE_ADAPT_POLICY1
2854 numchanged = cacheAdapt_policy_h4h();
2855 #elif defined GC_CACHE_ADAPT_POLICY2
2856 numchanged = cacheAdapt_policy_local();
2857 #elif defined GC_CACHE_ADAPT_POLICY3
2858 numchanged = cacheAdapt_policy_hotest();
2859 #elif defined GC_CACHE_ADAPT_POLICY4
2860 numchanged = cacheAdapt_policy_dominate();
2861 #elif defined GC_CACHE_ADAPT_POLICY5
2862 numchanged = cacheAdapt_policy_overload();
2863 #elif defined GC_CACHE_ADAPT_POLICY6
2864 numchanged = cacheAdapt_policy_crowd();
2866 *gccachepolicytbl = numchanged;
2869 // adapt the cache strategy for the mutator
2870 void cacheAdapt_mutator() {
2871 int numchanged = *gccachepolicytbl;
2872 // check the changes and adapt them
2873 int * tmp_p = gccachepolicytbl+1;
2874 while(numchanged--) {
2875 // read out the policy
2876 int page_index = *tmp_p;
2877 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
2879 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
2880 policy, BAMBOO_PAGE_SIZE);
2886 void gc_output_cache_sampling() {
2887 unsigned int page_index = 0;
2889 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2890 for(page_index = 0; page_index < page_num; page_index++) {
2891 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2893 BLOCKINDEX(page_sva, &block);
2894 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2895 tprintf("va: %x page_index: %d host: %d\n",
2896 (int)page_sva, page_index, coren);
2897 for(int i = 0; i < NUMCORESACTIVE; i++) {
2898 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl
2899 +size_cachesamplingtbl_local*i);
2900 unsigned int freq = local_tbl[page_index];
2901 printf("%8d ",freq);
2905 printf("=================\n");
2906 } // gc_output_cache_sampling
2908 void gc_output_cache_sampling_r() {
2909 unsigned int page_index = 0;
2911 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2912 for(page_index = 0; page_index < page_num; page_index++) {
2913 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2915 BLOCKINDEX(page_sva, &block);
2916 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2917 tprintf("va: %x page_index: %d host: %d\n",
2918 (int)page_sva, page_index, coren);
2919 for(int i = 0; i < NUMCORESACTIVE; i++) {
2920 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl_r
2921 +size_cachesamplingtbl_local_r*i);
2922 unsigned int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
2923 printf("%8d ",freq);
2927 printf("=================\n");
2928 } // gc_output_cache_sampling
2929 #endif // GC_CACHE_ADAPT
2931 inline void gc_collect(struct garbagelist * stackptr) {
2932 // inform the master that this core is at a gc safe point and is ready to
2934 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
2935 self_numreceiveobjs, false);
2937 // core collector routine
2939 if(INITPHASE == gcphase) {
2943 #ifdef RAWPATH // TODO GC_DEBUG
2944 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
2947 #ifdef GC_CACHE_ADAPT
2948 // prepare for cache adaption:
2949 cacheAdapt_gc(true);
2950 #endif // GC_CACHE_ADAPT
2951 //send init finish msg to core coordinator
2952 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
2955 if(MARKPHASE == gcphase) {
2959 #ifdef RAWPATH // TODO GC_DEBUG
2960 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
2961 udn_tile_coord_y());
2963 mark(true, stackptr);
2964 #ifdef RAWPATH // TODO GC_DEBUG
2965 printf("(%x,%x) Finish mark phase, start compact phase\n",
2966 udn_tile_coord_x(), udn_tile_coord_y());
2969 #ifdef RAWPATH // TODO GC_DEBUG
2970 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
2971 udn_tile_coord_y());
2975 if(FLUSHPHASE == gcphase) {
2979 #ifdef RAWPATH // TODO GC_DEBUG
2980 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
2981 udn_tile_coord_y());
2984 // send the num of obj/liveobj/forwardobj to the startupcore
2985 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2986 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
2987 gc_num_liveobj, gc_num_forwardobj, false);
2990 #endif // GC_PROFLIE
2992 #ifdef RAWPATH // TODO GC_DEBUG
2993 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
2994 udn_tile_coord_y());
2997 #ifdef GC_CACHE_ADAPT
2999 if(PREFINISHPHASE == gcphase) {
3003 #ifdef RAWPATH // TODO GC_DEBUG
3004 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3005 udn_tile_coord_y());
3007 // cache adapt phase
3008 cacheAdapt_mutator();
3009 cacheAdapt_gc(false);
3010 //send init finish msg to core coordinator
3011 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3012 #ifdef RAWPATH // TODO GC_DEBUG
3013 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3014 udn_tile_coord_y());
3016 #ifdef GC_CACHE_SAMPLING
3017 // reset the sampling arrays
3018 bamboo_dtlb_sampling_reset();
3019 #endif // GC_CACHE_SAMPLING
3020 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3021 // zero out the gccachesamplingtbl
3022 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3023 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3024 size_cachesamplingtbl_local_r);
3026 #endif // GC_CACHE_ADAPT
3028 // invalidate all shared mem pointers
3029 bamboo_cur_msp = NULL;
3030 bamboo_smem_size = 0;
3031 bamboo_smem_zero_top = NULL;
3034 if(FINISHPHASE == gcphase) {
3040 gcprocessing = false;
3041 #ifdef RAWPATH // TODO GC_DEBUG
3042 printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3044 } // void gc_collect(struct garbagelist * stackptr)
3046 inline void gc_nocollect(struct garbagelist * stackptr) {
3047 // inform the master that this core is at a gc safe point and is ready to
3049 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3050 self_numreceiveobjs, false);
3053 if(INITPHASE == gcphase) {
3057 #ifdef RAWPATH // TODO GC_DEBUG
3058 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3061 #ifdef GC_CACHE_ADAPT
3062 // prepare for cache adaption:
3063 cacheAdapt_gc(true);
3064 #endif // GC_CACHE_ADAPT
3065 //send init finish msg to core coordinator
3066 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3069 if(MARKPHASE == gcphase) {
3073 #ifdef RAWPATH // TODO GC_DEBUG
3074 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3075 udn_tile_coord_y());
3077 mark(true, stackptr);
3078 #ifdef RAWPATH // TODO GC_DEBUG
3079 printf("(%x,%x) Finish mark phase, wait for flush\n",
3080 udn_tile_coord_x(), udn_tile_coord_y());
3083 // non-gc core collector routine
3085 if(FLUSHPHASE == gcphase) {
3089 #ifdef RAWPATH // TODO GC_DEBUG
3090 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3091 udn_tile_coord_y());
3094 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3095 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3096 gc_num_liveobj, gc_num_forwardobj, false);
3099 #endif // GC_PROFLIE
3101 #ifdef RAWPATH // TODO GC_DEBUG
3102 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3103 udn_tile_coord_y());
3106 #ifdef GC_CACHE_ADAPT
3108 if(PREFINISHPHASE == gcphase) {
3112 #ifdef RAWPATH // TODO GC_DEBUG
3113 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3114 udn_tile_coord_y());
3116 // cache adapt phase
3117 cacheAdapt_mutator();
3118 cacheAdapt_gc(false);
3119 //send init finish msg to core coordinator
3120 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3121 #ifdef RAWPATH // TODO GC_DEBUG
3122 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3123 udn_tile_coord_y());
3125 #ifdef GC_CACHE_SAMPLING
3126 // reset the sampling arrays
3127 bamboo_dtlb_sampling_reset();
3128 #endif // GC_CACHE_SAMPLING
3129 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3130 // zero out the gccachesamplingtbl
3131 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3132 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3133 size_cachesamplingtbl_local_r);
3135 #endif // GC_CACHE_ADAPT
3137 // invalidate all shared mem pointers
3138 bamboo_cur_msp = NULL;
3139 bamboo_smem_size = 0;
3140 bamboo_smem_zero_top = NULL;
3143 if(FINISHPHASE == gcphase) {
3148 gcprocessing = false;
3149 #ifdef RAWPATH // TODO GC_DEBUG
3150 printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3152 } // void gc_collect(struct garbagelist * stackptr)
3154 inline void gc_master(struct garbagelist * stackptr) {
3156 gcphase = INITPHASE;
3158 waitconfirm = false;
3162 // Note: all cores need to init gc including non-gc cores
3163 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3164 // send GC init messages to all cores
3165 send_msg_1(i, GCSTARTINIT, false);
3167 bool isfirst = true;
3168 bool allStall = false;
3170 #ifdef GC_CACHE_ADAPT
3171 // prepare for cache adaption:
3172 cacheAdapt_gc(true);
3173 #endif // GC_CACHE_ADAPT
3175 #ifdef RAWPATH // TODO GC_DEBUG
3176 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3177 udn_tile_coord_y());
3180 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3182 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3183 if(gc_checkAllCoreStatus_I()) {
3184 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3187 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3192 #ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
3193 gc_output_cache_sampling();
3194 #endif // GC_CACHE_ADAPT
3195 #ifdef RAWPATH // TODO GC_DEBUG
3196 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3197 udn_tile_coord_y());
3199 // restore the gcstatus of all cores
3200 // Note: all cores have to do mark including non-gc cores
3201 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3202 for(i = 1; i < NUMCORESACTIVE; ++i) {
3203 gccorestatus[i] = 1;
3204 // send GC start messages to all cores
3205 send_msg_1(i, GCSTART, false);
3208 gcphase = MARKPHASE;
3210 while(MARKPHASE == gcphase) {
3211 mark(isfirst, stackptr);
3218 } // while(MARKPHASE == gcphase)
3219 // send msgs to all cores requiring large objs info
3220 // Note: only need to ask gc cores, non-gc cores do not host any objs
3221 numconfirm = NUMCORES4GC - 1;
3222 for(i = 1; i < NUMCORES4GC; ++i) {
3223 send_msg_1(i, GCLOBJREQUEST, false);
3225 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3230 } // wait for responses
3231 // check the heaptop
3232 if(gcheaptop < gcmarkedptrbound) {
3233 gcheaptop = gcmarkedptrbound;
3238 #ifdef RAWPATH // TODO GC_DEBUG
3239 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3240 udn_tile_coord_y());
3242 // cache all large objs
3244 // no enough space to cache large objs
3245 BAMBOO_EXIT(0xb006);
3247 // predict number of blocks to fill for each core
3249 int numpbc = loadbalance(&tmpheaptop);
3251 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3252 #ifdef RAWPATH // TODO GC_DEBUG
3253 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3254 udn_tile_coord_y());
3256 //int tmptopptr = 0;
3257 //BASEPTR(gctopcore, 0, &tmptopptr);
3259 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3260 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3261 GC_BAMBOO_DEBUGPRINT(0xabab);
3262 GC_BAMBOO_DEBUGPRINT_REG(tmptopptr);
3263 for(i = 0; i < NUMCORES4GC; ++i) {
3265 BASEPTR(i, numpbc, &tmpcoreptr);
3266 //send start compact messages to all cores
3267 //TODO bug here, do not know if the direction is positive or negtive?
3268 if (tmpcoreptr < tmpheaptop) {
3269 gcstopblock[i] = numpbc + 1;
3270 if(i != STARTUPCORE) {
3271 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3273 gcblock2fill = numpbc+1;
3274 } // if(i != STARTUPCORE)
3276 gcstopblock[i] = numpbc;
3277 if(i != STARTUPCORE) {
3278 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3280 gcblock2fill = numpbc;
3281 } // if(i != STARTUPCORE)
3283 GC_BAMBOO_DEBUGPRINT(0xf000+i);
3284 GC_BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3285 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3286 // init some data strutures for compact phase
3288 gcfilledblocks[i] = 0;
3289 gcrequiredmems[i] = 0;
3299 bool finalcompact = false;
3300 // initialize pointers for comapcting
3301 struct moveHelper * orig =
3302 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3303 struct moveHelper * to =
3304 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3305 initOrig_Dst(orig, to);
3306 int filledblocks = 0;
3307 INTPTR heaptopptr = 0;
3308 bool finishcompact = false;
3309 bool iscontinue = true;
3310 bool localcompact = true;
3311 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3312 if((!finishcompact) && iscontinue) {
3313 GC_BAMBOO_DEBUGPRINT(0xe001);
3314 GC_BAMBOO_DEBUGPRINT_REG(numpbc);
3315 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3316 finishcompact = compacthelper(orig, to, &filledblocks,
3317 &heaptopptr, &localcompact);
3318 GC_BAMBOO_DEBUGPRINT(0xe002);
3319 GC_BAMBOO_DEBUGPRINT_REG(finishcompact);
3320 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3321 GC_BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3322 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3323 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3326 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3327 if(gc_checkCoreStatus_I()) {
3328 // all cores have finished compacting
3329 // restore the gcstatus of all cores
3330 for(i = 0; i < NUMCORES4GC; ++i) {
3331 gccorestatus[i] = 1;
3333 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3336 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3337 // check if there are spare mem for pending move requires
3338 if(COMPACTPHASE == gcphase) {
3339 GC_BAMBOO_DEBUGPRINT(0xe003);
3340 resolvePendingMoveRequest();
3341 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3343 GC_BAMBOO_DEBUGPRINT(0xe004);
3346 } // if(gc_checkCoreStatus_I()) else ...
3349 GC_BAMBOO_DEBUGPRINT(0xe005);
3350 GC_BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3351 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3352 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3353 to->ptr = gcmovestartaddr;
3354 to->numblocks = gcblock2fill - 1;
3355 to->bound = (to->numblocks==0) ?
3356 BAMBOO_SMEM_SIZE_L :
3357 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3358 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3359 to->offset = to->ptr - to->base;
3360 to->top = (to->numblocks==0) ?
3361 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3363 to->offset = BAMBOO_CACHE_LINE_SIZE;
3364 to->ptr += to->offset; // for header
3365 to->top += to->offset;
3366 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3367 localcompact = true;
3369 localcompact = false;
3373 } else if(!finishcompact) {
3377 } // while(COMPACTPHASE == gcphase)
3381 #ifdef RAWPATH // TODO GC_DEBUG
3382 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3383 udn_tile_coord_y());
3387 #ifdef RAWPATH // TODO GC_DEBUG
3388 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3389 udn_tile_coord_y());
3395 gcphase = FLUSHPHASE;
3396 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3397 // Note: all cores should flush their runtime data including non-gc
3399 for(i = 1; i < NUMCORESACTIVE; ++i) {
3400 // send start flush messages to all cores
3401 gccorestatus[i] = 1;
3402 send_msg_1(i, GCSTARTFLUSH, false);
3407 #ifdef RAWPATH // TODO GC_DEBUG
3408 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3409 udn_tile_coord_y());
3414 #ifdef GC_CACHE_ADAPT
3415 // now the master core need to decide the new cache strategy
3416 cacheAdapt_master();
3417 #endif // GC_CACHE_ADAPT
3419 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3420 while(FLUSHPHASE == gcphase) {
3421 // check the status of all cores
3422 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3423 if(gc_checkAllCoreStatus_I()) {
3424 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3427 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3428 } // while(FLUSHPHASE == gcphase)
3429 #ifdef RAWPATH // TODO GC_DEBUG
3430 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3431 udn_tile_coord_y());
3434 #ifdef GC_CACHE_ADAPT
3438 gcphase = PREFINISHPHASE;
3439 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3440 // Note: all cores should flush their runtime data including non-gc
3442 for(i = 1; i < NUMCORESACTIVE; ++i) {
3443 // send start flush messages to all cores
3444 gccorestatus[i] = 1;
3445 send_msg_1(i, GCSTARTPREF, false);
3447 #ifdef RAWPATH // TODO GC_DEBUG
3448 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3449 udn_tile_coord_y());
3451 // cache adapt phase
3452 cacheAdapt_mutator();
3453 #ifdef GC_CACHE_ADAPT_OUTPUT
3454 bamboo_output_cache_policy();
3456 cacheAdapt_gc(false);
3458 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3459 while(PREFINISHPHASE == gcphase) {
3460 // check the status of all cores
3461 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3462 if(gc_checkAllCoreStatus_I()) {
3463 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3466 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3467 } // while(PREFINISHPHASE == gcphase)
3469 #ifdef GC_CACHE_SAMPLING
3470 // reset the sampling arrays
3471 bamboo_dtlb_sampling_reset();
3472 #endif // GC_CACHE_SAMPLING
3473 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3474 // zero out the gccachesamplingtbl
3475 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3476 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3477 size_cachesamplingtbl_local_r);
3478 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
3480 #endif // GC_CACHE_ADAPT
3482 gcphase = FINISHPHASE;
3484 // invalidate all shared mem pointers
3485 // put it here as it takes time to inform all the other cores to
3486 // finish gc and it might cause problem when some core resumes
3487 // mutator earlier than the other cores
3488 bamboo_cur_msp = NULL;
3489 bamboo_smem_size = 0;
3490 bamboo_smem_zero_top = NULL;
3495 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3496 for(i = 1; i < NUMCORESACTIVE; ++i) {
3497 // send gc finish messages to all cores
3498 send_msg_1(i, GCFINISH, false);
3499 gccorestatus[i] = 1;
3503 gcprocessing = false;
3504 #ifdef RAWPATH // TODO GC_DEBUG
3505 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
3506 udn_tile_coord_y());
3508 } // void gc_master(struct garbagelist * stackptr)
3510 inline bool gc(struct garbagelist * stackptr) {
3513 gcprocessing = false;
3517 #ifdef GC_CACHE_ADAPT
3518 #ifdef GC_CACHE_SAMPLING
3519 // disable the timer interrupt
3520 bamboo_mask_timer_intr();
3523 // core coordinator routine
3524 if(0 == BAMBOO_NUM_OF_CORE) {
3526 printf("(%x,%x) Check if can do gc or not\n", udn_tile_coord_x(),
3527 udn_tile_coord_y());
3529 bool isallstall = true;
3530 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3531 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3533 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
3534 if(gccorestatus[ti] != 0) {
3540 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3541 // some of the cores are still executing the mutator and did not reach
3542 // some gc safe point, therefore it is not ready to do gc
3543 // in case that there are some pregc information msg lost, send a confirm
3544 // msg to the 'busy' core
3545 send_msg_1(ti, GCSTARTPRE, false);
3553 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
3554 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
3556 GC_BAMBOO_DEBUGPRINT(0xec04);
3557 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3558 sumsendobj += gcnumsendobjs[0][i];
3559 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
3560 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3561 GC_BAMBOO_DEBUGPRINT(0xec05);
3562 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3563 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3564 sumsendobj -= gcnumreceiveobjs[0][i];
3565 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
3566 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3567 GC_BAMBOO_DEBUGPRINT(0xec06);
3568 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3569 if(0 != sumsendobj) {
3570 // there were still some msgs on the fly, wait until there
3571 // are some update pregc information coming and check it again
3573 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3581 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3584 #ifdef RAWPATH // TODO GC_DEBUG
3585 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3587 // Zero out the remaining bamboo_cur_msp
3588 // Only zero out the first 4 bytes of the remaining memory
3589 // Move the operation here because for the GC_CACHE_ADAPT version,
3590 // we need to make sure during the gcinit phase the shared heap is not
3591 // touched. Otherwise, there would be problem when adapt the cache
3593 if((bamboo_cur_msp != 0)
3594 && (bamboo_smem_zero_top == bamboo_cur_msp)
3595 && (bamboo_smem_size > 0)) {
3596 *((int *)bamboo_cur_msp) = 0;
3598 #ifdef GC_FLUSH_DTLB
3599 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3600 BAMBOO_CLEAN_DTLB();
3601 gc_num_flush_dtlb++;
3604 #ifdef GC_CACHE_ADAPT
3605 #ifdef GC_CACHE_SAMPLING
3606 // get the sampling data
3607 bamboo_output_dtlb_sampling();
3608 #endif // GC_CACHE_SAMPLING
3609 #endif // GC_CACHE_ADAPT
3610 gcprocessing = true;
3611 gc_master(stackptr);
3612 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
3613 // Zero out the remaining bamboo_cur_msp
3614 // Only zero out the first 4 bytes of the remaining memory
3615 // Move the operation here because for the GC_CACHE_ADAPT version,
3616 // we need to make sure during the gcinit phase the shared heap is not
3617 // touched. Otherwise, there would be problem when adapt the cache
3619 if((bamboo_cur_msp != 0)
3620 && (bamboo_smem_zero_top == bamboo_cur_msp)
3621 && (bamboo_smem_size > 0)) {
3622 *((int *)bamboo_cur_msp) = 0;
3624 #ifdef GC_FLUSH_DTLB
3625 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3626 BAMBOO_CLEAN_DTLB();
3627 gc_num_flush_dtlb++;
3630 #ifdef GC_CACHE_ADAPT
3631 #ifdef GC_CACHE_SAMPLING
3632 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3633 // get the sampling data
3634 bamboo_output_dtlb_sampling();
3636 #endif // GC_CACHE_SAMPLING
3637 #endif // GC_CACHE_ADAPT
3638 gcprocessing = true;
3639 gc_collect(stackptr);
3641 // Zero out the remaining bamboo_cur_msp
3642 // Only zero out the first 4 bytes of the remaining memory
3643 // Move the operation here because for the GC_CACHE_ADAPT version,
3644 // we need to make sure during the gcinit phase the shared heap is not
3645 // touched. Otherwise, there would be problem when adapt the cache
3647 if((bamboo_cur_msp != 0)
3648 && (bamboo_smem_zero_top == bamboo_cur_msp)
3649 && (bamboo_smem_size > 0)) {
3650 *((int *)bamboo_cur_msp) = 0;
3652 #ifdef GC_FLUSH_DTLB
3653 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3654 BAMBOO_CLEAN_DTLB();
3655 gc_num_flush_dtlb++;
3658 #ifdef GC_CACHE_ADAPT
3659 #ifdef GC_CACHE_SAMPLING
3660 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3661 // get the sampling data
3662 bamboo_output_dtlb_sampling();
3664 #endif // GC_CACHE_SAMPLING
3665 #endif // GC_CACHE_ADAPT
3666 // not a gc core, should wait for gcfinish msg
3667 gcprocessing = true;
3668 gc_nocollect(stackptr);
3670 #ifdef GC_CACHE_ADAPT
3671 #ifdef GC_CACHE_SAMPLING
3672 // enable the timer interrupt
3673 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
3674 bamboo_unmask_timer_intr();
3675 #endif // GC_CACHE_SAMPLING
3676 #endif // GC_CACHE_ADAPT
3679 } // void gc(struct garbagelist * stackptr)
3682 inline void gc_profileStart(void) {
3683 if(!gc_infoOverflow) {
3684 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
3685 gc_infoArray[gc_infoIndex] = gcInfo;
3687 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
3691 inline void gc_profileItem(void) {
3692 if(!gc_infoOverflow) {
3693 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3694 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3698 inline void gc_profileEnd(void) {
3699 if(!gc_infoOverflow) {
3700 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3701 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3702 gcInfo->time[gcInfo->index++] = gc_num_livespace;
3703 gcInfo->time[gcInfo->index++] = gc_num_freespace;
3704 gcInfo->time[gcInfo->index++] = gc_num_lobj;
3705 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
3706 gcInfo->time[gcInfo->index++] = gc_num_obj;
3707 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
3708 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
3710 if(gc_infoIndex == GCINFOLENGTH) {
3711 gc_infoOverflow = true;
3712 //taskInfoIndex = 0;
3717 // output the profiling data
3718 void gc_outputProfileData() {
3721 unsigned long long totalgc = 0;
3723 #ifndef BAMBOO_MEMPROF
3724 BAMBOO_PRINT(0xdddd);
3726 // output task related info
3727 for(i= 0; i < gc_infoIndex; i++) {
3728 GCInfo * gcInfo = gc_infoArray[i];
3729 #ifdef BAMBOO_MEMPROF
3730 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
3732 unsigned long long tmp = 0;
3733 BAMBOO_PRINT(0xddda);
3734 for(j = 0; j < gcInfo->index - 7; j++) {
3735 BAMBOO_PRINT(gcInfo->time[j]);
3736 BAMBOO_PRINT(gcInfo->time[j]-tmp);
3737 BAMBOO_PRINT(0xdddb);
3738 tmp = gcInfo->time[j];
3740 tmp = (tmp-gcInfo->time[0]);
3741 BAMBOO_PRINT_REG(tmp);
3742 BAMBOO_PRINT(0xdddc);
3743 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
3744 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
3745 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
3746 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
3747 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
3748 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
3749 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
3750 BAMBOO_PRINT(0xddde);
3754 #ifndef BAMBOO_MEMPROF
3755 BAMBOO_PRINT(0xdddf);
3757 BAMBOO_PRINT_REG(totalgc);
3759 if(gc_infoOverflow) {
3760 BAMBOO_PRINT(0xefee);
3763 #ifndef BAMBOO_MEMPROF
3764 BAMBOO_PRINT(0xeeee);
3767 #endif // #ifdef GC_PROFILE