3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 //void * dsts[NUMLOBJPTRS];
45 int lengths[NUMLOBJPTRS];
46 //void * origs[NUMLOBJPTRS];
47 int hosts[NUMLOBJPTRS];
48 struct lobjpointerblock *next;
49 struct lobjpointerblock *prev;
52 struct lobjpointerblock *gclobjhead=NULL;
53 int gclobjheadindex=0;
54 struct lobjpointerblock *gclobjtail=NULL;
55 int gclobjtailindex=0;
56 struct lobjpointerblock *gclobjtail2=NULL;
57 int gclobjtailindex2=0;
58 struct lobjpointerblock *gclobjspare=NULL;
61 typedef struct gc_cache_revise_info {
62 int orig_page_start_va;
68 int revised_sampling[NUMCORESACTIVE];
69 } gc_cache_revise_info_t;
70 gc_cache_revise_info_t gc_cache_revise_infomation;
71 #endif// GC_CACHE_ADAPT
74 // dump whole mem in blocks
75 inline void dumpSMem() {
83 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
85 // reserved blocks for sblocktbl
86 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
88 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
89 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
90 udn_tile_coord_x(), udn_tile_coord_y(),
91 *((int *)(i)), *((int *)(i + 4)),
92 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
93 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
94 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
95 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
96 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
97 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
98 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
100 sblock = gcreservedsb;
101 bool advanceblock = false;
103 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
104 advanceblock = false;
105 // computing sblock # and block #, core coordinate (x,y) also
106 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
108 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
109 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
121 coren = gc_block2core[block%(NUMCORES4GC*2)];
123 // compute core coordinate
124 BAMBOO_COORDS(coren, &x, &y);
125 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
126 udn_tile_coord_x(), udn_tile_coord_y(),
127 block, sblock++, x, y,
128 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
131 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
132 udn_tile_coord_x(), udn_tile_coord_y(),
133 *((int *)(i)), *((int *)(i + 4)),
134 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
135 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
136 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
137 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
138 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
139 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
140 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
142 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
146 // should be invoked with interruption closed
147 inline void gc_enqueue_I(void *ptr) {
149 BAMBOO_DEBUGPRINT(0xe601);
150 BAMBOO_DEBUGPRINT_REG(ptr);
152 if (gcheadindex==NUMPTRS) {
153 struct pointerblock * tmp;
158 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
159 } // if (gcspare!=NULL)
163 } // if (gcheadindex==NUMPTRS)
164 gchead->ptrs[gcheadindex++]=ptr;
166 BAMBOO_DEBUGPRINT(0xe602);
168 } // void gc_enqueue_I(void *ptr)
170 // dequeue and destroy the queue
171 inline void * gc_dequeue_I() {
172 if (gctailindex==NUMPTRS) {
173 struct pointerblock *tmp=gctail;
180 } // if (gcspare!=NULL)
181 } // if (gctailindex==NUMPTRS)
182 return gctail->ptrs[gctailindex++];
183 } // void * gc_dequeue()
185 // dequeue and do not destroy the queue
186 inline void * gc_dequeue2_I() {
187 if (gctailindex2==NUMPTRS) {
188 struct pointerblock *tmp=gctail2;
189 gctail2=gctail2->next;
191 } // if (gctailindex2==NUMPTRS)
192 return gctail2->ptrs[gctailindex2++];
193 } // void * gc_dequeue2()
195 inline int gc_moreItems_I() {
196 if ((gchead==gctail)&&(gctailindex==gcheadindex))
199 } // int gc_moreItems()
201 inline int gc_moreItems2_I() {
202 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
205 } // int gc_moreItems2()
207 // should be invoked with interruption closed
208 // enqueue a large obj: start addr & length
209 inline void gc_lobjenqueue_I(void *ptr,
213 BAMBOO_DEBUGPRINT(0xe901);
215 if (gclobjheadindex==NUMLOBJPTRS) {
216 struct lobjpointerblock * tmp;
217 if (gclobjspare!=NULL) {
221 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
222 } // if (gclobjspare!=NULL)
223 gclobjhead->next=tmp;
224 tmp->prev = gclobjhead;
227 } // if (gclobjheadindex==NUMLOBJPTRS)
228 gclobjhead->lobjs[gclobjheadindex]=ptr;
229 gclobjhead->lengths[gclobjheadindex]=length;
230 gclobjhead->hosts[gclobjheadindex++]=host;
232 BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
233 BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
234 BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
236 } // void gc_lobjenqueue_I(void *ptr...)
238 // dequeue and destroy the queue
239 inline void * gc_lobjdequeue_I(int * length,
241 if (gclobjtailindex==NUMLOBJPTRS) {
242 struct lobjpointerblock *tmp=gclobjtail;
243 gclobjtail=gclobjtail->next;
245 gclobjtail->prev = NULL;
246 if (gclobjspare!=NULL) {
252 } // if (gclobjspare!=NULL)
253 } // if (gclobjtailindex==NUMLOBJPTRS)
255 *length = gclobjtail->lengths[gclobjtailindex];
258 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
260 return gclobjtail->lobjs[gclobjtailindex++];
261 } // void * gc_lobjdequeue()
263 inline int gc_lobjmoreItems_I() {
264 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
267 } // int gc_lobjmoreItems()
269 // dequeue and don't destroy the queue
270 inline void gc_lobjdequeue2_I() {
271 if (gclobjtailindex2==NUMLOBJPTRS) {
272 gclobjtail2=gclobjtail2->next;
276 } // if (gclobjtailindex2==NUMLOBJPTRS)
277 } // void * gc_lobjdequeue2()
279 inline int gc_lobjmoreItems2_I() {
280 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
283 } // int gc_lobjmoreItems2()
285 // 'reversly' dequeue and don't destroy the queue
286 inline void gc_lobjdequeue3_I() {
287 if (gclobjtailindex2==0) {
288 gclobjtail2=gclobjtail2->prev;
289 gclobjtailindex2=NUMLOBJPTRS-1;
292 } // if (gclobjtailindex2==NUMLOBJPTRS)
293 } // void * gc_lobjdequeue3()
295 inline int gc_lobjmoreItems3_I() {
296 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
299 } // int gc_lobjmoreItems3()
301 inline void gc_lobjqueueinit4_I() {
302 gclobjtail2 = gclobjtail;
303 gclobjtailindex2 = gclobjtailindex;
304 } // void gc_lobjqueueinit2()
306 inline void * gc_lobjdequeue4_I(int * length,
308 if (gclobjtailindex2==NUMLOBJPTRS) {
309 gclobjtail2=gclobjtail2->next;
311 } // if (gclobjtailindex==NUMLOBJPTRS)
313 *length = gclobjtail2->lengths[gclobjtailindex2];
316 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
318 return gclobjtail2->lobjs[gclobjtailindex2++];
319 } // void * gc_lobjdequeue()
321 inline int gc_lobjmoreItems4_I() {
322 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
325 } // int gc_lobjmoreItems(
327 INTPTR gccurr_heapbound = 0;
329 inline void gettype_size(void * ptr,
332 int type = ((int *)ptr)[0];
334 if(type < NUMCLASSES) {
336 size = classsize[type];
339 struct ArrayObject *ao=(struct ArrayObject *)ptr;
340 int elementsize=classsize[type];
341 int length=ao->___length___;
342 size=sizeof(struct ArrayObject)+length*elementsize;
343 } // if(type < NUMCLASSES)
348 inline bool isLarge(void * ptr,
352 BAMBOO_DEBUGPRINT(0xe701);
353 BAMBOO_DEBUGPRINT_REG(ptr);
355 // check if a pointer is referring to a large object
356 gettype_size(ptr, ttype, tsize);
358 BAMBOO_DEBUGPRINT(*tsize);
360 int bound = (BAMBOO_SMEM_SIZE);
361 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
362 bound = (BAMBOO_SMEM_SIZE_L);
364 if((((int)ptr-gcbaseva)%(bound))==0) {
365 // ptr is a start of a block
367 BAMBOO_DEBUGPRINT(0xe702);
368 BAMBOO_DEBUGPRINT(1);
372 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
373 // it acrosses the boundary of current block
375 BAMBOO_DEBUGPRINT(0xe703);
376 BAMBOO_DEBUGPRINT(1);
381 BAMBOO_DEBUGPRINT(0);
384 } // bool isLarge(void * ptr, int * ttype, int * tsize)
386 inline int hostcore(void * ptr) {
387 // check the host core of ptr
389 RESIDECORE(ptr, &host);
391 BAMBOO_DEBUGPRINT(0xedd0);
392 BAMBOO_DEBUGPRINT_REG(ptr);
393 BAMBOO_DEBUGPRINT_REG(host);
396 } // int hostcore(void * ptr)
398 inline bool isLocal(void * ptr) {
399 // check if a pointer is in shared heap on this core
400 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
401 } // bool isLocal(void * ptr)
403 inline bool gc_checkCoreStatus_I() {
404 bool allStall = true;
405 for(int i = 0; i < NUMCORES4GC; ++i) {
406 if(gccorestatus[i] != 0) {
409 } // if(gccorestatus[i] != 0)
410 } // for(i = 0; i < NUMCORES4GC; ++i)
414 inline bool gc_checkAllCoreStatus_I() {
415 bool allStall = true;
416 for(int i = 0; i < NUMCORESACTIVE; ++i) {
417 if(gccorestatus[i] != 0) {
420 } // if(gccorestatus[i] != 0)
421 } // for(i = 0; i < NUMCORESACTIVE; ++i)
425 inline void checkMarkStatue() {
427 BAMBOO_DEBUGPRINT(0xee01);
431 (waitconfirm && (numconfirm == 0))) {
433 BAMBOO_DEBUGPRINT(0xee02);
438 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
441 entry_index = gcnumsrobjs_index;
443 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
444 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
445 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
446 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
447 // check the status of all cores
448 bool allStall = gc_checkAllCoreStatus_I();
450 BAMBOO_DEBUGPRINT(0xee03);
454 BAMBOO_DEBUGPRINT(0xee04);
459 BAMBOO_DEBUGPRINT(0xee05);
461 // the first time found all cores stall
462 // send out status confirm msg to all other cores
463 // reset the corestatus array too
464 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
466 numconfirm = NUMCORESACTIVE - 1;
467 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
468 for(i = 1; i < NUMCORESACTIVE; ++i) {
470 // send mark phase finish confirm request msg to core i
471 send_msg_1(i, GCMARKCONFIRM, false);
472 } // for(i = 1; i < NUMCORESACTIVE; ++i)
475 // check if the sum of send objs and receive obj are the same
476 // yes->check if the info is the latest; no->go on executing
478 for(i = 0; i < NUMCORESACTIVE; ++i) {
479 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
480 } // for(i = 0; i < NUMCORESACTIVE; ++i)
482 BAMBOO_DEBUGPRINT(0xee06);
483 BAMBOO_DEBUGPRINT_REG(sumsendobj);
485 for(i = 0; i < NUMCORESACTIVE; ++i) {
486 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
487 } // for(i = 0; i < NUMCORESACTIVE; ++i)
489 BAMBOO_DEBUGPRINT(0xee07);
490 BAMBOO_DEBUGPRINT_REG(sumsendobj);
492 if(0 == sumsendobj) {
493 // Check if there are changes of the numsendobjs or numreceiveobjs on
495 bool ischanged = false;
496 for(i = 0; i < NUMCORESACTIVE; ++i) {
497 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
498 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
502 } // for(i = 0; i < NUMCORESACTIVE; ++i)
504 BAMBOO_DEBUGPRINT(0xee08);
505 BAMBOO_DEBUGPRINT_REG(ischanged);
509 BAMBOO_DEBUGPRINT(0xee09);
511 // all the core status info are the latest
513 gcphase = COMPACTPHASE;
514 // restore the gcstatus for all cores
515 for(i = 0; i < NUMCORESACTIVE; ++i) {
517 } // for(i = 0; i < NUMCORESACTIVE; ++i)
520 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
523 // There were changes between phase 1 and phase 2, can not decide
524 // whether the mark phase has been finished
526 // As it fails in phase 2, flip the entries
527 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
528 } // if(0 == sumsendobj) else ...
529 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
530 } // if(!gcwaitconfirm) else()
532 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
534 } // if((!waitconfirm)...
536 BAMBOO_DEBUGPRINT(0xee0a);
538 } // void checkMarkStatue()
540 inline bool preGC() {
541 // preparation for gc
542 // make sure to clear all incoming msgs espacially transfer obj msgs
544 BAMBOO_DEBUGPRINT(0xec01);
548 (waitconfirm && (numconfirm == 0))) {
549 // send out status confirm msgs to all cores to check if there are
550 // transfer obj msgs on-the-fly
552 numconfirm = NUMCORESACTIVE - 1;
553 for(i = 1; i < NUMCORESACTIVE; ++i) {
555 // send status confirm msg to core i
556 send_msg_1(i, STATUSCONFIRM, false);
557 } // for(i = 1; i < NUMCORESACTIVE; ++i)
560 BAMBOO_DEBUGPRINT(0xec02);
563 if(numconfirm == 0) {
566 } // wait for confirmations
570 BAMBOO_DEBUGPRINT(0xec03);
572 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
573 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
576 BAMBOO_DEBUGPRINT(0xec04);
578 for(i = 0; i < NUMCORESACTIVE; ++i) {
579 sumsendobj += numsendobjs[i];
581 BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
583 } // for(i = 1; i < NUMCORESACTIVE; ++i)
585 BAMBOO_DEBUGPRINT(0xec05);
586 BAMBOO_DEBUGPRINT_REG(sumsendobj);
588 for(i = 0; i < NUMCORESACTIVE; ++i) {
589 sumsendobj -= numreceiveobjs[i];
591 BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
593 } // for(i = 1; i < NUMCORESACTIVE; ++i)
595 BAMBOO_DEBUGPRINT(0xec06);
596 BAMBOO_DEBUGPRINT_REG(sumsendobj);
598 if(0 == sumsendobj) {
601 // still have some transfer obj msgs on-the-fly, can not start gc
603 } // if(0 == sumsendobj)
606 BAMBOO_DEBUGPRINT(0xec07);
608 // previously asked for status confirmation and do not have all the
609 // confirmations yet, can not start gc
611 } // if((!waitconfirm) ||
614 inline void initGC() {
616 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
617 for(i = 0; i < NUMCORES4GC; ++i) {
619 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
620 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
622 gcrequiredmems[i] = 0;
623 gcfilledblocks[i] = 0;
625 } // for(i = 0; i < NUMCORES4GC; ++i)
626 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
628 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
629 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
634 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
635 gcself_numsendobjs = 0;
636 gcself_numreceiveobjs = 0;
637 gcmarkedptrbound = 0;
640 //gcismapped = false;
651 gcheadindex=gctailindex=gctailindex2 = 0;
652 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
654 gctailindex = gctailindex2 = gcheadindex;
655 gctail = gctail2 = gchead;
658 // initialize the large obj queues
659 if (gclobjhead==NULL) {
662 gclobjtailindex2 = 0;
663 gclobjhead=gclobjtail=gclobjtail2=
664 RUNMALLOC(sizeof(struct lobjpointerblock));
666 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
667 gclobjtail = gclobjtail2 = gclobjhead;
669 gclobjhead->next = gclobjhead->prev = NULL;
671 #ifdef LOCALHASHTBL_TEST
672 freeRuntimeHash(gcpointertbl);
673 gcpointertbl = allocateRuntimeHash(20);
675 mgchashreset(gcpointertbl);
677 //gcpointertbl = allocateMGCHash(20);
679 freeMGCHash(gcforwardobjtbl);
680 gcforwardobjtbl = allocateMGCHash(20, 3);
682 // initialize the mapping info related structures
683 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
684 // Never free the shared hash table, just reset it
685 /*freeGCSharedHash(gcsharedptbl);
686 gcsharedptbl = allocateGCSharedHash(20);*/
687 mgcsharedhashReset(gcsharedptbl);
689 // Zero out the remaining bamboo_cur_msp
690 // Only zero out the first 4 bytes of the remaining memory
691 /*if((bamboo_cur_msp != 0)
692 && (bamboo_smem_zero_top == bamboo_cur_msp)
693 && (bamboo_smem_size > 0)) {
694 *((int *)bamboo_cur_msp) = 0;
697 gc_num_livespace = 0;
698 gc_num_freespace = 0;
700 gc_num_lobjspace = 0;
702 gc_num_forwardobj = 0;
703 gc_num_profiles = NUMCORESACTIVE - 1;
707 // compute load balance for all cores
708 inline int loadbalance(int * heaptop) {
709 // compute load balance
712 // get the total loads
713 int tloads = gcloads[STARTUPCORE];
714 for(i = 1; i < NUMCORES4GC; i++) {
715 tloads += gcloads[i];
717 *heaptop = gcbaseva + tloads;
720 BAMBOO_DEBUGPRINT(0xdddd);
721 BAMBOO_DEBUGPRINT_REG(tloads);
722 BAMBOO_DEBUGPRINT_REG(*heaptop);
725 BLOCKINDEX(*heaptop, &b);
726 int numbpc = b / NUMCORES4GC; // num of blocks per core
728 BAMBOO_DEBUGPRINT_REG(b);
729 BAMBOO_DEBUGPRINT_REG(numbpc);
732 RESIDECORE(heaptop, &gctopcore);
734 BAMBOO_DEBUGPRINT_REG(gctopcore);
737 } // void loadbalance(int * heaptop)
739 inline bool cacheLObjs() {
740 // check the total mem size need for large objs
741 unsigned long long sumsize = 0;
744 BAMBOO_DEBUGPRINT(0xe801);
746 gclobjtail2 = gclobjtail;
747 gclobjtailindex2 = gclobjtailindex;
751 // compute total mem size required and sort the lobjs in ascending order
752 while(gc_lobjmoreItems2_I()) {
754 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
755 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
756 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
762 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
763 BAMBOO_DEBUGPRINT_REG(tmp_len);
764 BAMBOO_DEBUGPRINT_REG(sumsize);
766 int i = gclobjtailindex2-1;
767 struct lobjpointerblock * tmp_block = gclobjtail2;
768 // find the place to insert
771 if(tmp_block->prev == NULL) {
774 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
775 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
776 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
777 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
778 tmp_block = tmp_block->prev;
782 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
784 if(tmp_block->lobjs[i-1] > tmp_lobj) {
785 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
786 tmp_block->lengths[i] = tmp_block->lengths[i-1];
787 tmp_block->hosts[i] = tmp_block->hosts[i-1];
791 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
792 } // if(i ==0 ) else {}
795 if(i != gclobjtailindex2 - 1) {
796 tmp_block->lobjs[i] = tmp_lobj;
797 tmp_block->lengths[i] = tmp_len;
798 tmp_block->hosts[i] = tmp_host;
800 } // while(gc_lobjmoreItems2())
803 gc_num_lobjspace = sumsize;
805 // check if there are enough space to cache these large objs
806 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
807 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
808 // do not have enough room to cache large objs
810 BAMBOO_DEBUGPRINT(0xe802);
811 BAMBOO_DEBUGPRINT_REG(dst);
812 BAMBOO_DEBUGPRINT_REG(gcheaptop);
813 BAMBOO_DEBUGPRINT_REG(sumsize);
818 BAMBOO_DEBUGPRINT(0xe803);
819 BAMBOO_DEBUGPRINT_REG(dst);
820 BAMBOO_DEBUGPRINT_REG(gcheaptop);
823 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
824 // cache the largeObjs to the top of the shared heap
825 //gclobjtail2 = gclobjtail;
826 //gclobjtailindex2 = gclobjtailindex;
827 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
828 while(gc_lobjmoreItems3_I()) {
830 size = gclobjtail2->lengths[gclobjtailindex2];
831 // set the mark field to , indicating that this obj has been moved
832 // and need to be flushed
833 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
835 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
836 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
838 //BAMBOO_WRITE_HINT_CACHE(dst, size);
839 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
842 BAMBOO_DEBUGPRINT(0x804);
843 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
844 BAMBOO_DEBUGPRINT(dst);
845 BAMBOO_DEBUGPRINT_REG(size);
846 BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
847 BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
851 } // void cacheLObjs()
853 // update the bmmboo_smemtbl to record current shared mem usage
854 void updateSmemTbl(int coren,
857 int bound = BAMBOO_SMEM_SIZE_L;
858 BLOCKINDEX(localtop, <opcore);
859 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
860 bound = BAMBOO_SMEM_SIZE;
862 int load = (localtop-gcbaseva)%bound;
867 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
868 if(toset < ltopcore) {
869 bamboo_smemtbl[toset]=
870 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
872 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
874 } else if(toset == ltopcore) {
875 bamboo_smemtbl[toset] = load;
877 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
889 } // void updateSmemTbl(int, int)
891 inline void moveLObjs() {
893 BAMBOO_DEBUGPRINT(0xea01);
896 // update the gcmem_mixed_usedmem
897 gcmem_mixed_usedmem = 0;
899 // zero out the smemtbl
900 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
901 // find current heap top
902 // flush all gcloads to indicate the real heap top on one core
903 // previous it represents the next available ptr on a core
904 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
905 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
906 // edge of a block, check if this is exactly the heaptop
907 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
908 gcloads[0]+=(gcfilledblocks[0]>1 ?
909 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
911 updateSmemTbl(0, gcloads[0]);
913 BAMBOO_DEBUGPRINT(0xea02);
914 BAMBOO_DEBUGPRINT_REG(gcloads[0]);
915 BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
917 for(int i = 1; i < NUMCORES4GC; i++) {
920 BAMBOO_DEBUGPRINT(0xf000+i);
921 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
922 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
924 if((gcfilledblocks[i] > 0)
925 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
926 // edge of a block, check if this is exactly the heaptop
927 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
929 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
932 updateSmemTbl(i, gcloads[i]);
934 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
936 } // for(int i = 1; i < NUMCORES4GC; i++) {
938 // find current heap top
940 // a bug here: when using local allocation, directly move large objects
941 // to the highest free chunk might not be memory efficient
946 for(i = gcnumblock-1; i >= 0; i--) {
947 if(bamboo_smemtbl[i] > 0) {
952 tmpheaptop = gcbaseva;
954 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
955 (BAMBOO_SMEM_SIZE_L*i) :
956 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
959 // move large objs from gcheaptop to tmpheaptop
960 // write the header first
961 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
963 gcmem_mixed_usedmem += tomove;
966 BAMBOO_DEBUGPRINT(0xea03);
967 BAMBOO_DEBUGPRINT_REG(tomove);
968 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
969 BAMBOO_DEBUGPRINT_REG(gcheaptop);
971 // flush the sbstartbl
972 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
973 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
975 gcheaptop = tmpheaptop;
977 // check how many blocks it acrosses
978 int remain = tmpheaptop-gcbaseva;
979 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
980 int b = 0; // number of the block
981 BLOCKINDEX(tmpheaptop, &b);
982 // check the remaining space in this block
983 bound = (BAMBOO_SMEM_SIZE);
984 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
985 bound = (BAMBOO_SMEM_SIZE_L);
987 remain = bound - remain%bound;
990 BAMBOO_DEBUGPRINT(0xea04);
996 int base = tmpheaptop;
998 remain -= BAMBOO_CACHE_LINE_SIZE;
999 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1000 gc_lobjqueueinit4_I();
1001 while(gc_lobjmoreItems4_I()) {
1002 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
1003 ALIGNSIZE(size, &isize);
1004 if(remain < isize) {
1005 // this object acrosses blocks
1007 // close current block, fill its header
1008 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1009 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1010 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
1014 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1015 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1017 remain -= BAMBOO_CACHE_LINE_SIZE;
1018 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1019 BLOCKINDEX(tmpheaptop, &b);
1020 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
1021 } // if(cpysize > 0)
1023 // move the large obj
1024 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1025 memmove(tmpheaptop, gcheaptop, size);
1027 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1028 memcpy(tmpheaptop, gcheaptop, size);
1030 // fill the remaining space with -2 padding
1031 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1033 BAMBOO_DEBUGPRINT(0xea05);
1034 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1035 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1036 BAMBOO_DEBUGPRINT_REG(size);
1037 BAMBOO_DEBUGPRINT_REG(isize);
1038 BAMBOO_DEBUGPRINT_REG(base);
1041 // cache the mapping info anyway
1042 //if(ptr != tmpheaptop) {
1043 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1044 #ifdef LOCALHASHTBL_TEST
1045 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1047 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1049 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1050 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1053 BAMBOO_DEBUGPRINT(0xcdca);
1054 BAMBOO_DEBUGPRINT_REG(ptr);
1055 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1057 if(host != BAMBOO_NUM_OF_CORE) {
1058 // send the original host core with the mapping info
1059 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1061 BAMBOO_DEBUGPRINT(0xcdcb);
1062 BAMBOO_DEBUGPRINT_REG(ptr);
1063 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1065 } // if(host != BAMBOO_NUM_OF_CORE)
1066 tmpheaptop += isize;
1068 // set the gcsbstarttbl and bamboo_smemtbl
1069 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
1070 for(int k = 1; k < tmpsbs; k++) {
1071 gcsbstarttbl[sb+k] = (INTPTR)(-1);
1074 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1075 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
1076 for(; b < tmpsbs; b++) {
1077 bamboo_smemtbl[b] = bound;
1078 if(b==NUMCORES4GC-1) {
1079 bound = BAMBOO_SMEM_SIZE;
1082 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
1083 gcsbstarttbl[sb] = (INTPTR)(-1);
1084 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1085 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1086 bamboo_smemtbl[b] = bound;
1088 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1089 remain = tmpheaptop-gcbaseva;
1090 bamboo_smemtbl[b] = remain%bound;
1091 remain = bound - bamboo_smemtbl[b];
1092 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1094 // close current block and fill the header
1095 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1096 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1099 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1100 // fill with 0 in case
1101 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
1103 remain -= BAMBOO_CACHE_LINE_SIZE;
1104 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1107 // move the large obj
1108 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1109 memmove(tmpheaptop, gcheaptop, size);
1111 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1112 memcpy(tmpheaptop, gcheaptop, size);
1114 // fill the remaining space with -2 padding
1115 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1117 BAMBOO_DEBUGPRINT(0xea06);
1118 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1119 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1120 BAMBOO_DEBUGPRINT_REG(size);
1121 BAMBOO_DEBUGPRINT_REG(isize);
1126 // cache the mapping info anyway
1127 //if(ptr != tmpheaptop) {
1128 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1129 #ifdef LOCALHASHTBL_TEST
1130 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1132 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1134 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1135 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1138 BAMBOO_DEBUGPRINT(0xcdcc);
1139 BAMBOO_DEBUGPRINT_REG(ptr);
1140 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1141 BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1143 if(host != BAMBOO_NUM_OF_CORE) {
1144 // send the original host core with the mapping info
1145 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1147 BAMBOO_DEBUGPRINT(0xcdcd);
1148 BAMBOO_DEBUGPRINT_REG(ptr);
1149 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1151 } // if(host != BAMBOO_NUM_OF_CORE)
1152 tmpheaptop += isize;
1154 // update bamboo_smemtbl
1155 bamboo_smemtbl[b] += isize;
1156 } // if(remain < isize) else ...
1157 } // while(gc_lobjmoreItems())
1159 // close current block, fill the header
1160 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1161 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1162 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
1164 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1166 gcheaptop = tmpheaptop;
1168 } // if(tomove == 0)
1171 BAMBOO_DEBUGPRINT(0xea07);
1172 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1175 bamboo_free_block = 0;
1178 tbound = (bamboo_free_block<NUMCORES4GC) ?
1179 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1180 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1181 bamboo_free_block++;
1183 // the first non-full partition
1189 // check how many live space there are
1190 gc_num_livespace = 0;
1191 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1192 gc_num_livespace += bamboo_smemtbl[tmpi];
1194 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1197 BAMBOO_DEBUGPRINT(0xea08);
1198 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1200 } // void moveLObjs()
1202 inline void markObj(void * objptr) {
1203 if(objptr == NULL) {
1206 if(ISSHAREDOBJ(objptr)) {
1207 int host = hostcore(objptr);
1208 if(BAMBOO_NUM_OF_CORE == host) {
1210 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1211 if(((int *)objptr)[6] == INIT) {
1212 // this is the first time that this object is discovered,
1213 // set the flag as DISCOVERED
1214 ((int *)objptr)[6] |= DISCOVERED;
1215 BAMBOO_CACHE_FLUSH_LINE(objptr);
1216 gc_enqueue_I(objptr);
1218 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1221 BAMBOO_DEBUGPRINT(0xbbbb);
1222 BAMBOO_DEBUGPRINT_REG(host);
1223 BAMBOO_DEBUGPRINT_REG(objptr);
1225 // check if this obj has been forwarded
1226 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1227 // send a msg to host informing that objptr is active
1228 send_msg_2(host, GCMARKEDOBJ, objptr, /*BAMBOO_NUM_OF_CORE,*/ false);
1230 gc_num_forwardobj++;
1231 #endif // GC_PROFILE
1232 gcself_numsendobjs++;
1233 MGCHashadd(gcforwardobjtbl, (int)objptr);
1237 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1238 gc_enqueue_I(objptr);
1239 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1240 } // if(ISSHAREDOBJ(objptr))
1241 } // void markObj(void * objptr)
1243 // enqueue root objs
1244 inline void tomark(struct garbagelist * stackptr) {
1245 if(MARKPHASE != gcphase) {
1247 BAMBOO_DEBUGPRINT_REG(gcphase);
1249 BAMBOO_EXIT(0xb101);
1251 gcbusystatus = true;
1255 // enqueue current stack
1256 while(stackptr!=NULL) {
1258 BAMBOO_DEBUGPRINT(0xe501);
1259 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1260 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1261 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1263 for(i=0; i<stackptr->size; i++) {
1264 if(stackptr->array[i] != NULL) {
1265 markObj(stackptr->array[i]);
1268 stackptr=stackptr->next;
1272 BAMBOO_DEBUGPRINT(0xe503);
1274 // enqueue objectsets
1275 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1276 for(i=0; i<NUMCLASSES; i++) {
1277 struct parameterwrapper ** queues =
1278 objectqueues[BAMBOO_NUM_OF_CORE][i];
1279 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1280 for(j = 0; j < length; ++j) {
1281 struct parameterwrapper * parameter = queues[j];
1282 struct ObjectHash * set=parameter->objectset;
1283 struct ObjectNode * ptr=set->listhead;
1285 markObj((void *)ptr->key);
1292 // euqueue current task descriptor
1293 if(currtpd != NULL) {
1295 BAMBOO_DEBUGPRINT(0xe504);
1297 for(i=0; i<currtpd->numParameters; i++) {
1298 markObj(currtpd->parameterArray[i]);
1303 BAMBOO_DEBUGPRINT(0xe505);
1305 // euqueue active tasks
1306 if(activetasks != NULL) {
1307 struct genpointerlist * ptr=activetasks->list;
1309 struct taskparamdescriptor *tpd=ptr->src;
1311 for(i=0; i<tpd->numParameters; i++) {
1312 markObj(tpd->parameterArray[i]);
1319 BAMBOO_DEBUGPRINT(0xe506);
1321 // enqueue cached transferred obj
1322 struct QueueItem * tmpobjptr = getHead(&objqueue);
1323 while(tmpobjptr != NULL) {
1324 struct transObjInfo * objInfo =
1325 (struct transObjInfo *)(tmpobjptr->objectptr);
1326 markObj(objInfo->objptr);
1327 tmpobjptr = getNextQueueItem(tmpobjptr);
1331 BAMBOO_DEBUGPRINT(0xe507);
1333 // enqueue cached objs to be transferred
1334 struct QueueItem * item = getHead(totransobjqueue);
1335 while(item != NULL) {
1336 struct transObjInfo * totransobj =
1337 (struct transObjInfo *)(item->objectptr);
1338 markObj(totransobj->objptr);
1339 item = getNextQueueItem(item);
1340 } // while(item != NULL)
1343 BAMBOO_DEBUGPRINT(0xe508);
1345 // enqueue lock related info
1346 for(i = 0; i < runtime_locklen; ++i) {
1347 markObj((void *)(runtime_locks[i].redirectlock));
1348 if(runtime_locks[i].value != NULL) {
1349 markObj((void *)(runtime_locks[i].value));
1353 } // void tomark(struct garbagelist * stackptr)
1355 inline void mark(bool isfirst,
1356 struct garbagelist * stackptr) {
1358 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
1362 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
1364 // enqueue root objs
1366 gccurr_heaptop = 0; // record the size of all active objs in this core
1367 // aligned but does not consider block boundaries
1368 gcmarkedptrbound = 0;
1371 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
1374 bool checkfield = true;
1375 bool sendStall = false;
1377 while(MARKPHASE == gcphase) {
1379 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
1382 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1383 bool hasItems = gc_moreItems2_I();
1384 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1386 BAMBOO_DEBUGPRINT(0xed05);
1392 gcbusystatus = true;
1394 void * ptr = gc_dequeue2_I();
1397 BAMBOO_DEBUGPRINT_REG(ptr);
1402 // check if it is a shared obj
1403 if(ISSHAREDOBJ(ptr)) {
1404 // a shared obj, check if it is a local obj on this core
1405 int host = hostcore(ptr);
1406 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1408 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1409 if(isLarge(ptr, &type, &size) && isnotmarked) {
1410 // ptr is a large object and not marked or enqueued
1412 BAMBOO_DEBUGPRINT(0xecec);
1413 BAMBOO_DEBUGPRINT_REG(ptr);
1414 BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1416 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1417 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1419 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1421 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1422 BAMBOO_CACHE_FLUSH_LINE(ptr);
1423 } else if(isnotmarked) {
1424 // ptr is an unmarked active object on this core
1425 ALIGNSIZE(size, &isize);
1426 gccurr_heaptop += isize;
1428 BAMBOO_DEBUGPRINT(0xaaaa);
1429 BAMBOO_DEBUGPRINT_REG(ptr);
1430 BAMBOO_DEBUGPRINT_REG(isize);
1431 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1434 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1435 BAMBOO_CACHE_FLUSH_LINE(ptr);
1437 if(ptr + size > gcmarkedptrbound) {
1438 gcmarkedptrbound = ptr + size;
1439 } // if(ptr + size > gcmarkedptrbound)
1441 // ptr is not an active obj or has been marked
1443 } // if(isLarge(ptr, &type, &size)) else ...
1444 } /* can never reach here
1447 if(BAMBOO_NUM_OF_CORE == 0) {
1448 BAMBOO_DEBUGPRINT(0xbbbb);
1449 BAMBOO_DEBUGPRINT_REG(host);
1450 BAMBOO_DEBUGPRINT_REG(ptr);
1453 // check if this obj has been forwarded
1454 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1455 // send a msg to host informing that ptr is active
1456 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1457 gcself_numsendobjs++;
1458 MGCHashadd(gcforwardobjtbl, (int)ptr);
1461 }// if(isLocal(ptr)) else ...*/
1462 } // if(ISSHAREDOBJ(ptr))
1464 BAMBOO_DEBUGPRINT(0xed06);
1468 // scan all pointers in ptr
1469 unsigned INTPTR * pointer;
1470 pointer=pointerarray[type];
1472 /* Array of primitives */
1474 } else if (((INTPTR)pointer)==1) {
1475 /* Array of pointers */
1476 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1477 int length=ao->___length___;
1479 for(j=0; j<length; j++) {
1481 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1485 INTPTR size=pointer[0];
1487 for(i=1; i<=size; i++) {
1488 unsigned int offset=pointer[i];
1489 void * objptr=*((void **)(((char *)ptr)+offset));
1492 } // if (pointer==0) else if ... else ...
1494 } // while(gc_moreItems2())
1496 BAMBOO_DEBUGPRINT(0xed07);
1498 gcbusystatus = false;
1499 // send mark finish msg to core coordinator
1500 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1502 BAMBOO_DEBUGPRINT(0xed08);
1504 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1505 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1506 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1507 gcself_numreceiveobjs;
1508 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1512 BAMBOO_DEBUGPRINT(0xed09);
1514 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1515 gcself_numsendobjs, gcself_numreceiveobjs, false);
1518 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1520 BAMBOO_DEBUGPRINT(0xed0a);
1523 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1525 BAMBOO_DEBUGPRINT(0xed0b);
1529 } // while(MARKPHASE == gcphase)
1534 inline void compact2Heaptophelper_I(int coren,
1539 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1540 if(STARTUPCORE == coren) {
1542 gcmovestartaddr = *p;
1543 gcdstcore = gctopcore;
1544 gcblock2fill = *numblocks + 1;
1546 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1549 BAMBOO_DEBUGPRINT_REG(coren);
1550 BAMBOO_DEBUGPRINT_REG(gctopcore);
1551 BAMBOO_DEBUGPRINT_REG(*p);
1552 BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1554 if(memneed < *remain) {
1556 BAMBOO_DEBUGPRINT(0xd104);
1559 gcrequiredmems[coren] = 0;
1560 gcloads[gctopcore] += memneed;
1561 *remain = *remain - memneed;
1564 BAMBOO_DEBUGPRINT(0xd105);
1566 // next available block
1568 gcfilledblocks[gctopcore] += 1;
1570 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1571 gcloads[gctopcore] = newbase;
1572 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1573 gcstopblock[gctopcore]++;
1574 gctopcore = NEXTTOPCORE(gctopblock);
1576 *numblocks = gcstopblock[gctopcore];
1577 *p = gcloads[gctopcore];
1579 *remain=(b<NUMCORES4GC) ?
1580 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1581 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1583 BAMBOO_DEBUGPRINT(0xd106);
1584 BAMBOO_DEBUGPRINT_REG(gctopcore);
1585 BAMBOO_DEBUGPRINT_REG(*p);
1586 BAMBOO_DEBUGPRINT_REG(b);
1587 BAMBOO_DEBUGPRINT_REG(*remain);
1589 } // if(memneed < remain)
1591 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1593 inline void compact2Heaptop() {
1594 // no cores with spare mem and some cores are blocked with pending move
1595 // find the current heap top and make them move to the heap top
1597 int numblocks = gcfilledblocks[gctopcore];
1598 //BASEPTR(gctopcore, numblocks, &p);
1599 p = gcloads[gctopcore];
1602 int remain = (b<NUMCORES4GC) ?
1603 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1604 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1605 // check if the top core finishes
1606 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1607 if(gccorestatus[gctopcore] != 0) {
1609 BAMBOO_DEBUGPRINT(0xd101);
1610 BAMBOO_DEBUGPRINT_REG(gctopcore);
1612 // let the top core finishes its own work first
1613 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1614 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1617 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1620 BAMBOO_DEBUGPRINT(0xd102);
1621 BAMBOO_DEBUGPRINT_REG(gctopcore);
1622 BAMBOO_DEBUGPRINT_REG(p);
1623 BAMBOO_DEBUGPRINT_REG(b);
1624 BAMBOO_DEBUGPRINT_REG(remain);
1626 for(int i = 0; i < NUMCORES4GC; i++) {
1627 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1628 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1630 BAMBOO_DEBUGPRINT(0xd103);
1632 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1633 if(gccorestatus[gctopcore] != 0) {
1635 BAMBOO_DEBUGPRINT(0xd101);
1636 BAMBOO_DEBUGPRINT_REG(gctopcore);
1638 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1639 // the top core is not free now
1642 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1643 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1644 } // for(i = 0; i < NUMCORES4GC; i++)
1646 BAMBOO_DEBUGPRINT(0xd106);
1648 } // void compact2Heaptop()
1650 inline void resolvePendingMoveRequest() {
1652 BAMBOO_DEBUGPRINT(0xeb01);
1655 BAMBOO_DEBUGPRINT(0xeeee);
1656 for(int k = 0; k < NUMCORES4GC; k++) {
1657 BAMBOO_DEBUGPRINT(0xf000+k);
1658 BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1659 BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1660 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1661 BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1663 BAMBOO_DEBUGPRINT(0xffff);
1667 bool nosparemem = true;
1668 bool haspending = false;
1669 bool hasrunning = false;
1670 bool noblock = false;
1671 int dstcore = 0; // the core who need spare mem
1672 int sourcecore = 0; // the core who has spare mem
1673 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1675 // check if there are cores with spare mem
1676 if(gccorestatus[i] == 0) {
1677 // finished working, check if it still have spare mem
1678 if(gcfilledblocks[i] < gcstopblock[i]) {
1679 // still have spare mem
1682 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1687 if(gccorestatus[j] != 0) {
1688 // not finished, check if it has pending move requests
1689 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1694 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1695 } // if(gccorestatus[i] == 0) else ...
1697 } // if(!haspending)
1698 if(!nosparemem && haspending) {
1702 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1703 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1704 gcrequiredmems[dstcore],
1707 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1709 BAMBOO_DEBUGPRINT(0xeb02);
1710 BAMBOO_DEBUGPRINT_REG(sourcecore);
1711 BAMBOO_DEBUGPRINT_REG(dstcore);
1712 BAMBOO_DEBUGPRINT_REG(startaddr);
1713 BAMBOO_DEBUGPRINT_REG(tomove);
1715 if(STARTUPCORE == dstcore) {
1717 BAMBOO_DEBUGPRINT(0xeb03);
1719 gcdstcore = sourcecore;
1721 gcmovestartaddr = startaddr;
1722 gcblock2fill = tomove;
1725 BAMBOO_DEBUGPRINT(0xeb04);
1727 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1728 startaddr, tomove, false);
1735 } // for(i = 0; i < NUMCORES4GC; i++)
1737 BAMBOO_DEBUGPRINT(0xcccc);
1738 BAMBOO_DEBUGPRINT_REG(hasrunning);
1739 BAMBOO_DEBUGPRINT_REG(haspending);
1740 BAMBOO_DEBUGPRINT_REG(noblock);
1743 if(!hasrunning && !noblock) {
1744 gcphase = SUBTLECOMPACTPHASE;
1748 } // void resovePendingMoveRequest()
1751 int numblocks; // block num for heap
1752 INTPTR base; // base virtual address of current heap block
1753 INTPTR ptr; // virtual address of current heap top
1754 int offset; // offset in current heap block
1755 int blockbase; // virtual address of current small block to check
1756 int blockbound; // bound virtual address of current small blcok
1757 int sblockindex; // index of the small blocks
1758 int top; // real size of current heap block to check
1759 int bound; // bound size of current heap block to check
1760 }; // struct moveHelper
1762 // If out of boundary of valid shared memory, return false, else return true
1763 inline bool nextSBlock(struct moveHelper * orig) {
1764 orig->blockbase = orig->blockbound;
1765 bool sbchanged = false;
1767 BAMBOO_DEBUGPRINT(0xecc0);
1768 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1769 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1770 BAMBOO_DEBUGPRINT_REG(orig->bound);
1771 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1774 // check if across a big block
1775 // TODO now do not zero out the whole memory, maybe the last two conditions
1777 if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
1778 || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
1779 || ((*((int*)orig->blockbase))==0)) {
1781 // end of current heap block, jump to next one
1784 BAMBOO_DEBUGPRINT(0xecc1);
1785 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1787 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1789 BAMBOO_DEBUGPRINT(orig->base);
1791 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1793 orig->ptr = orig->base; // set current ptr to out of boundary too
1796 //orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1797 orig->blockbase = orig->base;
1798 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1801 BLOCKINDEX(orig->base, &blocknum);
1802 if(bamboo_smemtbl[blocknum] == 0) {
1804 goto innernextSBlock;
1806 // check the bamboo_smemtbl to decide the real bound
1807 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1808 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1809 orig->sblockindex += 1;
1811 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1813 // check if this sblock should be skipped or have special start point
1814 if(gcsbstarttbl[orig->sblockindex] == -1) {
1817 BAMBOO_DEBUGPRINT(0xecc2);
1819 orig->sblockindex += 1;
1820 orig->blockbase += BAMBOO_SMEM_SIZE;
1821 goto outernextSBlock;
1822 } else if((gcsbstarttbl[orig->sblockindex] != 0)
1824 // the first time to access this SBlock
1826 BAMBOO_DEBUGPRINT(0xecc3);
1828 // not start from the very beginning
1829 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1830 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1832 // setup information for this sblock
1833 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1834 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1835 orig->ptr = orig->blockbase + orig->offset;
1837 BAMBOO_DEBUGPRINT(0xecc4);
1838 BAMBOO_DEBUGPRINT_REG(orig->base);
1839 BAMBOO_DEBUGPRINT_REG(orig->bound);
1840 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1841 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1842 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1843 BAMBOO_DEBUGPRINT_REG(orig->offset);
1845 if(orig->ptr >= orig->bound) {
1846 // met a lobj, move to next block
1847 goto innernextSBlock;
1851 } // bool nextSBlock(struct moveHelper * orig)
1853 // return false if there are no available data to compact
1854 inline bool initOrig_Dst(struct moveHelper * orig,
1855 struct moveHelper * to) {
1858 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1859 to->bound = BAMBOO_SMEM_SIZE_L;
1860 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1863 BAMBOO_DEBUGPRINT(0xef01);
1864 BAMBOO_DEBUGPRINT_REG(to->base);
1866 to->ptr = to->base + to->offset;
1867 #ifdef GC_CACHE_ADAPT
1868 // initialize the gc_cache_revise_information
1869 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1870 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1871 ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1872 gc_cache_revise_infomation.to_page_index =
1873 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
1874 gc_cache_revise_infomation.orig_page_start_va = -1;
1875 #endif // GC_CACHE_ADAPT
1877 // init the orig ptr
1878 orig->numblocks = 0;
1879 orig->base = to->base;
1881 BLOCKINDEX(orig->base, &blocknum);
1882 // check the bamboo_smemtbl to decide the real bound
1883 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1884 orig->blockbase = orig->base;
1885 orig->sblockindex = (orig->base - gcbaseva) / BAMBOO_SMEM_SIZE;
1887 BAMBOO_DEBUGPRINT(0xef02);
1888 BAMBOO_DEBUGPRINT_REG(orig->base);
1889 BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1890 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1891 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1894 if(gcsbstarttbl[orig->sblockindex] == -1) {
1896 BAMBOO_DEBUGPRINT(0xef03);
1900 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1901 return nextSBlock(orig);
1902 } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1904 BAMBOO_DEBUGPRINT(0xef04);
1906 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1909 BAMBOO_DEBUGPRINT(0xef05);
1911 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1912 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1913 orig->ptr = orig->blockbase + orig->offset;
1915 BAMBOO_DEBUGPRINT(0xef06);
1916 BAMBOO_DEBUGPRINT_REG(orig->base);
1920 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1922 inline void nextBlock(struct moveHelper * to) {
1923 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1924 to->bound += BAMBOO_SMEM_SIZE;
1926 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1927 to->offset = BAMBOO_CACHE_LINE_SIZE;
1928 to->ptr = to->base + to->offset;
1929 } // void nextBlock(struct moveHelper * to)
1931 // endaddr does not contain spaces for headers
1932 inline bool moveobj(struct moveHelper * orig,
1933 struct moveHelper * to,
1935 if(stopblock == 0) {
1940 BAMBOO_DEBUGPRINT(0xe201);
1941 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1942 BAMBOO_DEBUGPRINT_REG(to->ptr);
1950 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1951 orig->ptr = (int*)(orig->ptr) + 1;
1953 #ifdef GC_CACHE_ADAPT
1954 if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
1955 // end of an orig page
1956 // compute the impact of this page for the new page
1958 ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va))/
1959 ((float)(BAMBOO_PAGE_SIZE));
1960 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1961 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
1962 )[gc_cache_revise_infomation.to_page_index] += (int)(
1963 ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
1964 gc_cache_revise_infomation.orig_page_index]*tmp_factor);
1966 /* if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
1967 tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
1970 // prepare for an new orig page
1971 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1972 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1973 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1974 gc_cache_revise_infomation.orig_page_index =
1975 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1976 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1979 if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
1980 if(!nextSBlock(orig)) {
1981 // finished, no more data
1987 BAMBOO_DEBUGPRINT(0xe202);
1988 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1989 BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1991 // check the obj's type, size and mark flag
1992 type = ((int *)(orig->ptr))[0];
1995 // end of this block, go to next one
1996 if(!nextSBlock(orig)) {
1997 // finished, no more data
2001 } else if(type < NUMCLASSES) {
2003 size = classsize[type];
2006 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
2007 int elementsize=classsize[type];
2008 int length=ao->___length___;
2009 size=sizeof(struct ArrayObject)+length*elementsize;
2011 mark = ((int *)(orig->ptr))[6];
2012 bool isremote = ((((int *)(orig->ptr))[6] & REMOTEM) != 0);
2014 BAMBOO_DEBUGPRINT(0xe203);
2015 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2016 BAMBOO_DEBUGPRINT_REG(size);
2018 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
2019 // should be able to across it
2020 if((mark & MARKED) != 0) {
2022 BAMBOO_DEBUGPRINT(0xe204);
2027 // marked obj, copy it to current heap top
2028 // check to see if remaining space is enough
2029 if(to->top + isize > to->bound) {
2030 // fill 0 indicating the end of this block
2031 BAMBOO_MEMSET_WH(to->ptr, '\0', to->bound - to->top);
2032 // fill the header of this block and then go to next block
2033 to->offset += to->bound - to->top;
2034 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2035 (*((int*)(to->base))) = to->offset;
2036 #ifdef GC_CACHE_ADAPT
2037 int tmp_ptr = to->ptr;
2038 #endif // GC_CACHE_ADAPT
2040 #ifdef GC_CACHE_ADAPT
2041 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2042 // end of an to page, wrap up its information
2044 ((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va))/
2045 ((float)(BAMBOO_PAGE_SIZE));
2046 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2047 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
2048 )[gc_cache_revise_infomation.to_page_index] += (int)(
2049 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2050 gc_cache_revise_infomation.orig_page_index]*tmp_factor);
2052 /* if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
2053 tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
2056 // prepare for an new to page
2057 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2058 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2059 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2060 gc_cache_revise_infomation.orig_page_index =
2061 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2062 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2063 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2064 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2065 gc_cache_revise_infomation.to_page_index =
2066 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2068 #endif // GC_CACHE_ADAPT
2069 if(stopblock == to->numblocks) {
2070 // already fulfilled the block
2072 } // if(stopblock == to->numblocks)
2073 } // if(to->top + isize > to->bound)
2074 // set the mark field to 2, indicating that this obj has been moved
2075 // and need to be flushed
2076 ((int *)(orig->ptr))[6] = COMPACTED;
2077 if(to->ptr != orig->ptr) {
2078 if((int)(orig->ptr) < (int)(to->ptr)+size) {
2079 memmove(to->ptr, orig->ptr, size);
2081 //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
2082 memcpy(to->ptr, orig->ptr, size);
2084 // fill the remaining space with -2
2085 BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
2087 // store mapping info
2088 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2089 #ifdef LOCALHASHTBL_TEST
2090 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2092 mgchashInsert_I(gcpointertbl, orig->ptr, to->ptr);
2094 //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2096 // add to the sharedptbl
2097 if(gcsharedptbl != NULL) {
2098 //GCSharedHashadd_I(gcsharedptbl, orig->ptr, to->ptr);
2099 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
2102 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2105 BAMBOO_DEBUGPRINT(0xcdce);
2106 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2107 BAMBOO_DEBUGPRINT_REG(to->ptr);
2108 BAMBOO_DEBUGPRINT_REG(isize);
2110 gccurr_heaptop -= isize;
2112 to->offset += isize;
2114 #ifdef GC_CACHE_ADAPT
2115 int tmp_ptr = to->ptr;
2116 #endif // GC_CACHE_ADAPT
2117 if(to->top == to->bound) {
2118 // fill the header of this block and then go to next block
2119 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2120 (*((int*)(to->base))) = to->offset;
2123 #ifdef GC_CACHE_ADAPT
2124 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2125 // end of an to page, wrap up its information
2127 ((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va))/
2128 ((float)(BAMBOO_PAGE_SIZE));
2129 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2130 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
2131 )[gc_cache_revise_infomation.to_page_index] += (int)(
2132 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2133 gc_cache_revise_infomation.orig_page_index]*tmp_factor);
2135 /* if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
2136 tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
2139 // prepare for an new to page
2140 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2141 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2142 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2143 gc_cache_revise_infomation.orig_page_index =
2144 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2145 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2146 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2147 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2148 gc_cache_revise_infomation.to_page_index =
2149 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2151 #endif // GC_CACHE_ADAPT
2154 BAMBOO_DEBUGPRINT(0xe205);
2160 BAMBOO_DEBUGPRINT_REG(isize);
2161 BAMBOO_DEBUGPRINT_REG(size);
2162 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2163 BAMBOO_DEBUGPRINT_REG(orig->bound);
2165 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2167 BAMBOO_DEBUGPRINT(0xe206);
2169 if(!nextSBlock(orig)) {
2170 // finished, no more data
2175 BAMBOO_DEBUGPRINT(0xe207);
2176 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2179 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2181 // should be invoked with interrupt closed
2182 inline int assignSpareMem_I(int sourcecore,
2187 BLOCKINDEX(gcloads[sourcecore], &b);
2188 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2189 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2190 int remain = boundptr - gcloads[sourcecore];
2191 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2192 *startaddr = gcloads[sourcecore];
2193 *tomove = gcfilledblocks[sourcecore] + 1;
2194 if(memneed < remain) {
2195 gcloads[sourcecore] += memneed;
2198 // next available block
2199 gcfilledblocks[sourcecore] += 1;
2201 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2202 gcloads[sourcecore] = newbase;
2203 return requiredmem-remain;
2205 } // int assignSpareMem_I(int ,int * , int * , int * )
2207 // should be invoked with interrupt closed
2208 inline bool gcfindSpareMem_I(int * startaddr,
2213 for(int k = 0; k < NUMCORES4GC; k++) {
2214 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2215 // check if this stopped core has enough mem
2216 assignSpareMem_I(k, requiredmem, tomove, startaddr);
2221 // if can not find spare mem right now, hold the request
2222 gcrequiredmems[requiredcore] = requiredmem;
2225 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2227 inline bool compacthelper(struct moveHelper * orig,
2228 struct moveHelper * to,
2231 bool * localcompact) {
2232 // scan over all objs in this block, compact the marked objs
2233 // loop stop when finishing either scanning all active objs or
2234 // fulfilled the gcstopblock
2236 BAMBOO_DEBUGPRINT(0xe101);
2237 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2238 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2241 while(orig->ptr < gcmarkedptrbound) {
2242 bool stop = moveobj(orig, to, gcblock2fill);
2247 #ifdef GC_CACHE_ADAPT
2248 // end of an to page, wrap up its information
2250 ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va))/
2251 ((float)(BAMBOO_PAGE_SIZE));
2252 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2253 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
2254 )[gc_cache_revise_infomation.to_page_index] += (int)(
2255 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2256 gc_cache_revise_infomation.orig_page_index]*tmp_factor);
2258 #endif // GC_CACHE_ADAPT
2259 // if no objs have been compact, do nothing,
2260 // otherwise, fill the header of this block
2261 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2262 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2263 (*((int*)(to->base))) = to->offset;
2267 to->top -= BAMBOO_CACHE_LINE_SIZE;
2268 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2270 *heaptopptr = to->ptr;
2271 *filledblocks = to->numblocks;
2274 BAMBOO_DEBUGPRINT(0xe102);
2275 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2276 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2277 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2278 BAMBOO_DEBUGPRINT_REG(*filledblocks);
2279 BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2282 // send msgs to core coordinator indicating that the compact is finishing
2283 // send compact finish message to core coordinator
2284 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2285 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2286 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2287 if(orig->ptr < gcmarkedptrbound) {
2289 BAMBOO_DEBUGPRINT(0xe103);
2293 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2294 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2295 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2297 BAMBOO_DEBUGPRINT(0xe104);
2301 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2303 BAMBOO_DEBUGPRINT(0xe105);
2307 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2310 BAMBOO_DEBUGPRINT(0xe106);
2312 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2317 if(orig->ptr < gcmarkedptrbound) {
2319 BAMBOO_DEBUGPRINT(0xe107);
2323 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2324 *filledblocks, *heaptopptr, gccurr_heaptop, false);
2327 BAMBOO_DEBUGPRINT(0xe108);
2328 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2330 // finish compacting
2331 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2332 *filledblocks, *heaptopptr, 0, false);
2334 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2336 if(orig->ptr < gcmarkedptrbound) {
2338 BAMBOO_DEBUGPRINT(0xe109);
2340 // still have unpacked obj
2349 BAMBOO_DEBUGPRINT(0xe10a);
2352 to->ptr = gcmovestartaddr;
2353 to->numblocks = gcblock2fill - 1;
2354 to->bound = (to->numblocks==0) ?
2355 BAMBOO_SMEM_SIZE_L :
2356 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2357 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2358 to->offset = to->ptr - to->base;
2359 to->top = (to->numblocks==0) ?
2360 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2362 to->offset = BAMBOO_CACHE_LINE_SIZE;
2363 to->ptr += to->offset; // for header
2364 to->top += to->offset;
2365 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2366 *localcompact = true;
2368 *localcompact = false;
2370 #ifdef GC_CACHE_ADAPT
2371 // initialize the gc_cache_revise_information
2372 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2373 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2374 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2375 gc_cache_revise_infomation.to_page_index =
2376 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2377 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2378 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2379 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2380 gc_cache_revise_infomation.orig_page_index =
2381 orig->blockbase/(BAMBOO_PAGE_SIZE);
2382 #endif // GC_CACHE_ADAPT
2386 BAMBOO_DEBUGPRINT(0xe10b);
2389 } // void compacthelper()
2391 inline void compact() {
2392 if(COMPACTPHASE != gcphase) {
2393 BAMBOO_EXIT(0xb102);
2396 // initialize pointers for comapcting
2397 struct moveHelper * orig =
2398 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2399 struct moveHelper * to =
2400 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2402 if(!initOrig_Dst(orig, to)) {
2403 // no available data to compact
2404 // send compact finish msg to STARTUP core
2406 BAMBOO_DEBUGPRINT(0xe001);
2407 BAMBOO_DEBUGPRINT_REG(to->base);
2409 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2410 0, to->base, 0, false);
2415 #ifdef GC_CACHE_ADAPT
2416 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2417 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2418 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2419 gc_cache_revise_infomation.orig_page_index =
2420 orig->blockbase/(BAMBOO_PAGE_SIZE);
2423 int filledblocks = 0;
2424 INTPTR heaptopptr = 0;
2425 bool localcompact = true;
2426 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2432 // if return NULL, means
2433 // 1. objptr is NULL
2434 // 2. objptr is not a shared obj
2435 // in these cases, remain the original value is OK
2436 inline void * flushObj(void * objptr) {
2438 BAMBOO_DEBUGPRINT(0xe401);
2440 if(objptr == NULL) {
2443 void * dstptr = NULL;
2444 if(ISSHAREDOBJ(objptr)) {
2446 BAMBOO_DEBUGPRINT(0xe402);
2447 BAMBOO_DEBUGPRINT_REG(objptr);
2449 // a shared obj ptr, change to new address
2450 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2452 //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
2454 #ifdef LOCALHASHTBL_TEST
2455 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2457 dstptr = mgchashSearch(gcpointertbl, objptr);
2459 //MGCHashget(gcpointertbl, objptr, &dstptr);
2461 //flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
2463 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2465 BAMBOO_DEBUGPRINT_REG(dstptr);
2468 if(NULL == dstptr) {
2471 BAMBOO_DEBUGPRINT(0xe403);
2472 BAMBOO_DEBUGPRINT_REG(objptr);
2473 BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2475 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2476 // error! the obj is right on this core, but cannot find it
2477 //BAMBOO_DEBUGPRINT(0xecec);
2478 BAMBOO_DEBUGPRINT_REG(objptr);
2479 BAMBOO_EXIT(0xb103);
2480 // assume that the obj has not been moved, use the original address
2483 int hostc = hostcore(objptr);
2485 //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
2487 // check the corresponsing sharedptbl
2488 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2489 //struct GCSharedHash * sptbl = gcrpointertbls[hostcore(objptr)];
2490 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2492 //GCSharedHashget(sptbl, (int)objptr, &dstptr);
2493 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2494 if(dstptr != NULL) {
2495 #ifdef LOCALHASHTBL_TEST
2496 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2498 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2502 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2504 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2507 if(dstptr == NULL) {
2508 // still can not get the mapping info,
2509 // send msg to host core for the mapping info
2510 gcobj2map = (int)objptr;
2513 // the first time require the mapping, send msg to the hostcore
2514 // for the mapping info
2515 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2516 BAMBOO_NUM_OF_CORE, false);
2523 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2525 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2526 #ifdef LOCALHASHTBL_TEST
2527 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2529 dstptr = mgchashSearch(gcpointertbl, objptr);
2531 //MGCHashget(gcpointertbl, objptr, &dstptr);
2532 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2533 } // if(dstptr == NULL)
2534 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2536 BAMBOO_DEBUGPRINT_REG(dstptr);
2538 } // if(NULL == dstptr)
2539 } // if(ISSHAREDOBJ(objptr))
2540 // if not a shared obj, return NULL to indicate no need to flush
2542 BAMBOO_DEBUGPRINT(0xe404);
2545 } // void flushObj(void * objptr)
2547 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2549 // flush current stack
2550 while(stackptr!=NULL) {
2551 for(i=0; i<stackptr->size; i++) {
2552 if(stackptr->array[i] != NULL) {
2553 void * dst = flushObj(stackptr->array[i]);
2555 stackptr->array[i] = dst;
2559 stackptr=stackptr->next;
2563 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2564 for(i=0; i<NUMCLASSES; i++) {
2565 struct parameterwrapper ** queues =
2566 objectqueues[BAMBOO_NUM_OF_CORE][i];
2567 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2568 for(j = 0; j < length; ++j) {
2569 struct parameterwrapper * parameter = queues[j];
2570 struct ObjectHash * set=parameter->objectset;
2571 struct ObjectNode * ptr=set->listhead;
2573 void * dst = flushObj((void *)ptr->key);
2579 ObjectHashrehash(set);
2584 // flush current task descriptor
2585 if(currtpd != NULL) {
2586 for(i=0; i<currtpd->numParameters; i++) {
2587 void * dst = flushObj(currtpd->parameterArray[i]);
2589 currtpd->parameterArray[i] = dst;
2594 // flush active tasks
2595 if(activetasks != NULL) {
2596 struct genpointerlist * ptr=activetasks->list;
2598 struct taskparamdescriptor *tpd=ptr->src;
2600 for(i=0; i<tpd->numParameters; i++) {
2601 void * dst = flushObj(tpd->parameterArray[i]);
2603 tpd->parameterArray[i] = dst;
2608 genrehash(activetasks);
2611 // flush cached transferred obj
2612 struct QueueItem * tmpobjptr = getHead(&objqueue);
2613 while(tmpobjptr != NULL) {
2614 struct transObjInfo * objInfo =
2615 (struct transObjInfo *)(tmpobjptr->objectptr);
2616 void * dst = flushObj(objInfo->objptr);
2618 objInfo->objptr = dst;
2620 tmpobjptr = getNextQueueItem(tmpobjptr);
2623 // flush cached objs to be transferred
2624 struct QueueItem * item = getHead(totransobjqueue);
2625 while(item != NULL) {
2626 struct transObjInfo * totransobj =
2627 (struct transObjInfo *)(item->objectptr);
2628 void * dst = flushObj(totransobj->objptr);
2630 totransobj->objptr = dst;
2632 item = getNextQueueItem(item);
2633 } // while(item != NULL)
2635 // enqueue lock related info
2636 for(i = 0; i < runtime_locklen; ++i) {
2637 void * dst = flushObj(runtime_locks[i].redirectlock);
2639 runtime_locks[i].redirectlock = (int)dst;
2641 if(runtime_locks[i].value != NULL) {
2642 void * dst=flushObj(runtime_locks[i].value);
2644 runtime_locks[i].value = (int)dst;
2649 } // void flushRuntimeObj(struct garbagelist * stackptr)
2651 inline void transmappinginfo() {
2652 // broadcast the sharedptbl pointer
2653 for(int i = 0; i < NUMCORESACTIVE; i++) {
2654 if(i != BAMBOO_NUM_OF_CORE) {
2655 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2659 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2660 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2664 inline void flush(struct garbagelist * stackptr) {
2666 flushRuntimeObj(stackptr);
2669 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2670 bool hasItems = gc_moreItems_I();
2671 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2677 BAMBOO_DEBUGPRINT(0xe301);
2679 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2680 void * ptr = gc_dequeue_I();
2681 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2682 if(ISSHAREDOBJ(ptr)) {
2683 // should be a local shared obj and should have mapping info
2684 ptr = flushObj(ptr);
2686 BAMBOO_DEBUGPRINT(0xe302);
2687 BAMBOO_DEBUGPRINT_REG(ptr);
2688 BAMBOO_DEBUGPRINT_REG(tptr);
2689 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2692 BAMBOO_EXIT(0xb105);
2694 } // if(ISSHAREDOBJ(ptr))
2695 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2696 int type = ((int *)(ptr))[0];
2697 // scan all pointers in ptr
2698 unsigned INTPTR * pointer;
2699 pointer=pointerarray[type];
2701 BAMBOO_DEBUGPRINT(0xe303);
2702 BAMBOO_DEBUGPRINT_REG(pointer);
2705 /* Array of primitives */
2707 } else if (((INTPTR)pointer)==1) {
2709 BAMBOO_DEBUGPRINT(0xe304);
2711 /* Array of pointers */
2712 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2713 int length=ao->___length___;
2715 for(j=0; j<length; j++) {
2717 BAMBOO_DEBUGPRINT(0xe305);
2720 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2722 BAMBOO_DEBUGPRINT_REG(objptr);
2724 if(objptr != NULL) {
2725 void * dst = flushObj(objptr);
2727 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2733 BAMBOO_DEBUGPRINT(0xe306);
2735 INTPTR size=pointer[0];
2737 for(i=1; i<=size; i++) {
2739 BAMBOO_DEBUGPRINT(0xe307);
2741 unsigned int offset=pointer[i];
2742 void * objptr=*((void **)(((char *)ptr)+offset));
2744 BAMBOO_DEBUGPRINT_REG(objptr);
2746 if(objptr != NULL) {
2747 void * dst = flushObj(objptr);
2749 *((void **)(((char *)ptr)+offset)) = dst;
2752 } // for(i=1; i<=size; i++)
2753 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2754 // restore the mark field, indicating that this obj has been flushed
2755 if(ISSHAREDOBJ(ptr)) {
2756 ((int *)(ptr))[6] = INIT;
2758 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2759 } // while(gc_moreItems())
2761 BAMBOO_DEBUGPRINT(0xe308);
2764 // TODO bug here: the startup core contains all lobjs' info, thus all the
2765 // lobjs are flushed in sequence.
2767 while(gc_lobjmoreItems_I()) {
2769 BAMBOO_DEBUGPRINT(0xe309);
2771 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2772 ptr = flushObj(ptr);
2774 BAMBOO_DEBUGPRINT(0xe30a);
2775 BAMBOO_DEBUGPRINT_REG(ptr);
2776 BAMBOO_DEBUGPRINT_REG(tptr);
2777 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2780 BAMBOO_EXIT(0xb106);
2782 if(((int *)(ptr))[6] == COMPACTED) {
2783 int type = ((int *)(ptr))[0];
2784 // scan all pointers in ptr
2785 unsigned INTPTR * pointer;
2786 pointer=pointerarray[type];
2788 BAMBOO_DEBUGPRINT(0xe30b);
2789 BAMBOO_DEBUGPRINT_REG(pointer);
2792 /* Array of primitives */
2794 } else if (((INTPTR)pointer)==1) {
2796 BAMBOO_DEBUGPRINT(0xe30c);
2798 /* Array of pointers */
2799 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2800 int length=ao->___length___;
2802 for(j=0; j<length; j++) {
2804 BAMBOO_DEBUGPRINT(0xe30d);
2807 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2809 BAMBOO_DEBUGPRINT_REG(objptr);
2811 if(objptr != NULL) {
2812 void * dst = flushObj(objptr);
2814 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2820 BAMBOO_DEBUGPRINT(0xe30e);
2822 INTPTR size=pointer[0];
2824 for(i=1; i<=size; i++) {
2826 BAMBOO_DEBUGPRINT(0xe30f);
2828 unsigned int offset=pointer[i];
2829 void * objptr=*((void **)(((char *)ptr)+offset));
2832 BAMBOO_DEBUGPRINT_REG(objptr);
2834 if(objptr != NULL) {
2835 void * dst = flushObj(objptr);
2837 *((void **)(((char *)ptr)+offset)) = dst;
2840 } // for(i=1; i<=size; i++)
2841 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2842 // restore the mark field, indicating that this obj has been flushed
2843 ((int *)(ptr))[6] = INIT;
2844 } // if(((int *)(ptr))[6] == COMPACTED)
2845 } // while(gc_lobjmoreItems())
2847 BAMBOO_DEBUGPRINT(0xe310);
2850 // send flush finish message to core coordinator
2851 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2852 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2854 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2857 BAMBOO_DEBUGPRINT(0xe311);
2861 #ifdef GC_CACHE_ADAPT
2862 // prepare for cache adaption:
2863 // -- flush the shared heap
2864 // -- clean dtlb entries
2865 // -- change cache strategy
2866 void cacheAdapt_gc(bool isgccachestage) {
2867 // flush the shared heap
2868 BAMBOO_CACHE_FLUSH_L2();
2870 // clean the dtlb entries
2871 BAMBOO_CLEAN_DTLB();
2873 // change the cache strategy
2874 gccachestage = isgccachestage;
2875 } // cacheAdapt_gc(bool isgccachestage)
2877 // the master core decides how to adapt cache strategy for the mutator
2878 // according to collected statistic data
2879 extern int gc_num_sampling;
2881 // make all pages hfh
2882 int cacheAdapt_policy_h4h(){
2883 unsigned int page_index = 0;
2885 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2887 int * tmp_p = gccachepolicytbl+1;
2888 for(page_index = 0; page_index < page_num; page_index++) {
2889 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2890 bamboo_cache_policy_t policy = {0};
2891 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2892 *tmp_p = page_index;
2894 *tmp_p = policy.word;
2900 } // int cacheAdapt_policy_hfh()
2902 // make all pages local as non-cache-adaptable gc local mode
2903 int cacheAdapt_policy_local(){
2904 unsigned int page_index = 0;
2906 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2908 int * tmp_p = gccachepolicytbl+1;
2909 for(page_index = 0; page_index < page_num; page_index++) {
2910 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2911 bamboo_cache_policy_t policy = {0};
2913 BLOCKINDEX(page_sva, &block);
2914 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2915 // locally cache the page in the hotest core
2916 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2917 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2918 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2919 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2920 *tmp_p = page_index;
2922 *tmp_p = policy.word;
2928 } // int cacheAdapt_policy_local()
2930 int cacheAdapt_policy_hotest(){
2931 unsigned int page_index = 0;
2933 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2935 int * tmp_p = gccachepolicytbl+1;
2936 for(page_index = 0; page_index < page_num; page_index++) {
2937 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2938 bamboo_cache_policy_t policy = {0};
2942 for(int i = 0; i < NUMCORESACTIVE; i++) {
2943 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
2944 +page_num*sizeof(float)*i);
2945 int freq = local_tbl[page_index];
2947 // check the freqency, decide if this page is hot for the core
2948 if(hotfreq < freq) {
2954 // Decide the cache strategy for this page
2955 // If decide to adapt a new cache strategy, write into the shared block of
2956 // the gcsharedsamplingtbl. The mem recording information that has been
2957 // written is enough to hold the information.
2958 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2960 // this page has not been accessed, do not change its cache policy
2963 // locally cache the page in the hotest core
2964 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2965 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2966 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2967 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2968 *tmp_p = page_index;
2970 *tmp_p = policy.word;
2977 } // int cacheAdapt_policy_hotest()
2979 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
2980 // cache the page on the core that accesses it the most if that core accesses
2981 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
2983 int cacheAdapt_policy_dominate(){
2984 unsigned int page_index = 0;
2986 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2988 int * tmp_p = gccachepolicytbl+1;
2989 for(page_index = 0; page_index < page_num; page_index++) {
2990 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2991 bamboo_cache_policy_t policy = {0};
2996 for(int i = 0; i < NUMCORESACTIVE; i++) {
2997 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
2998 +page_num*sizeof(float)*i);
2999 int freq = local_tbl[page_index];
3002 // check the freqency, decide if this page is hot for the core
3003 if(hotfreq < freq) {
3008 // Decide the cache strategy for this page
3009 // If decide to adapt a new cache strategy, write into the shared block of
3011 // Format: page start va + cache policy
3013 // this page has not been accessed, do not change its cache policy
3016 totalfreq = (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100;
3017 if(hotfreq < totalfreq) {
3019 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3021 // locally cache the page in the hotest core
3022 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3023 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3024 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3025 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3027 *tmp_p = page_index;
3029 *tmp_p = policy.word;
3035 } // int cacheAdapt_policy_dominate()
3037 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 1000
3039 void gc_quicksort(int *array,
3045 int rightIdx = right;
3046 if((right-left+1) >= 1) {
3047 pivot = (left+right)/2;
3048 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
3049 int pivotValue = array[pivot*3-offset];
3050 while((array[leftIdx*3-offset] < pivotValue) && (leftIdx <= pivot)) {
3053 while((array[rightIdx*3-offset] > pivotValue) && (rightIdx >= pivot)) {
3056 // swap [leftIdx] & [rightIdx]
3057 for(int k = 0; k < 3; k++) {
3058 int tmp = array[3*rightIdx-k];
3059 array[3*rightIdx-k] = array[3*leftIdx-k];
3060 array[3*leftIdx-k] = tmp;
3064 if((leftIdx-1) == pivot) {
3065 pivot = rightIdx = rightIdx + 1;
3066 } else if((leftIdx+1) == pivot) {
3067 pivot = leftIdx = leftIdx-1;
3070 gc_quicksort(array, left, pivot-1, offset);
3071 gc_quicksort(array, pivot+1, right, offset);
3074 } // void gc_quicksort(...)
3076 // Every page cached on the core that accesses it the most.
3077 // Check to see if any core's pages total more accesses than threshold
3078 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3079 // most remote accesses and hash for home them until we get below
3080 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
3081 int cacheAdapt_policy_overload(){
3082 unsigned int page_index = 0;
3084 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3086 int * tmp_p = gccachepolicytbl+1;
3087 int workload[NUMCORESACTIVE];
3088 memset(workload, 0, NUMCORESACTIVE*sizeof(int));
3089 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3090 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3091 for(page_index = 0; page_index < page_num; page_index++) {
3092 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3093 bamboo_cache_policy_t policy = {0};
3098 for(int i = 0; i < NUMCORESACTIVE; i++) {
3099 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3100 +page_num*sizeof(float)*i);
3101 int freq = local_tbl[page_index];
3104 // check the freqency, decide if this page is hot for the core
3105 if(hotfreq < freq) {
3111 // Decide the cache strategy for this page
3112 // If decide to adapt a new cache strategy, write into the shared block of
3113 // the gcsharedsamplingtbl. The mem recording information that has been
3114 // written is enough to hold the information.
3115 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3117 // this page has not been accessed, do not change its cache policy
3120 // locally cache the page in the hotest core
3121 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3122 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3123 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3124 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3125 *tmp_p = page_index;
3127 *tmp_p = policy.word;
3130 workload[hotestcore] += totalfreq;
3131 // insert into core2heavypages using quicksort
3132 int remoteaccess = totalfreq - hotfreq;
3133 int index = core2heavypages[hotestcore][0];
3134 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3135 core2heavypages[hotestcore][3*index+2] = totalfreq;
3136 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3137 core2heavypages[hotestcore][0]++;
3140 // Check the workload of each core
3141 for(int i = 0; i < NUMCORESACTIVE; i++) {
3143 int index = core2heavypages[i][0];
3144 if(workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) {
3145 // sort according to the remoteaccess
3146 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3147 while((workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) && (j<index*3)) {
3148 // hfh those pages with more remote accesses
3149 bamboo_cache_policy_t policy = {0};
3150 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3151 *((int*)core2heavypages[i][j]) = policy.word;
3152 workload[i] -= core2heavypages[i][j+1];
3159 } // int cacheAdapt_policy_overload()
3161 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3162 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 10
3163 // Every page cached on the core that accesses it the most.
3164 // Check to see if any core's pages total more accesses than threshold
3165 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3166 // most remote accesses and hash for home them until we get below
3167 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3168 // Sort pages based on activity....
3169 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3170 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3171 // then start hfh these pages(selecting the ones with the most remote
3172 // accesses first or fewest local accesses) until we get below
3173 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3174 int cacheAdapt_policy_crowd(){
3175 unsigned int page_index = 0;
3177 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3179 int * tmp_p = gccachepolicytbl+1;
3180 int workload[NUMCORESACTIVE];
3181 memset(workload, 0, NUMCORESACTIVE*sizeof(int));
3182 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3183 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3184 for(page_index = 0; page_index < page_num; page_index++) {
3185 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3186 bamboo_cache_policy_t policy = {0};
3191 for(int i = 0; i < NUMCORESACTIVE; i++) {
3192 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3193 +page_num*sizeof(float)*i);
3194 int freq = local_tbl[page_index];
3197 // check the freqency, decide if this page is hot for the core
3198 if(hotfreq < freq) {
3204 // Decide the cache strategy for this page
3205 // If decide to adapt a new cache strategy, write into the shared block of
3206 // the gcsharedsamplingtbl. The mem recording information that has been
3207 // written is enough to hold the information.
3208 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3210 // this page has not been accessed, do not change its cache policy
3213 // locally cache the page in the hotest core
3214 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3215 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3216 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3217 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3218 *tmp_p = page_index;
3220 *tmp_p = policy.word;
3223 workload[hotestcore] += totalfreq;
3224 // insert into core2heavypages using quicksort
3225 int remoteaccess = totalfreq - hotfreq;
3226 int index = core2heavypages[hotestcore][0];
3227 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3228 core2heavypages[hotestcore][3*index+2] = totalfreq;
3229 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3230 core2heavypages[hotestcore][0]++;
3233 // Check the workload of each core
3234 for(int i = 0; i < NUMCORESACTIVE; i++) {
3236 int index = core2heavypages[i][0];
3237 if(workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) {
3238 // sort according to the remote access
3239 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3240 while((workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) && (j<index*3)) {
3241 // hfh those pages with more remote accesses
3242 bamboo_cache_policy_t policy = {0};
3243 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3244 *((int*)core2heavypages[i][j]) = policy.word;
3245 workload[i] -= core2heavypages[i][j+1];
3250 // Check if the accesses are crowded on few pages
3251 // sort according to the total access
3252 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3253 int threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3254 int num_crowded = 0;
3256 for(;(num_crowded<GC_CACHE_ADAPT_CROWD_THRESHOLD)
3257 &&(num_crowded<(index-j/3)); num_crowded++) {
3258 t_workload += core2heavypages[i][j+num_crowded*3+1];
3260 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3261 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3262 if(t_workload > threshold) {
3264 // need to hfh these pages
3265 // sort the pages according to remote access
3266 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3267 while((num_crowded--) && (j < index*3)) {
3268 // h4h those pages with more remote accesses
3269 bamboo_cache_policy_t policy = {0};
3270 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3271 *((int*)core2heavypages[i][j]) = policy.word;
3272 workload[i] -= core2heavypages[i][j+1];
3273 t_workload -= core2heavypages[i][j+1];
3274 if((j/3+GC_CACHE_ADAPT_CROWD_THRESHOLD) < index) {
3276 core2heavypages[i][j+GC_CACHE_ADAPT_CROWD_THRESHOLD*3+1];
3279 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3280 if(t_workload <= threshold) {
3284 if((j < index*3) && (t_workload > threshold)) {
3285 num_crowded = ((index-j/3) > GC_CACHE_ADAPT_CROWD_THRESHOLD) ?
3286 (GC_CACHE_ADAPT_CROWD_THRESHOLD) : (index-j/3);
3293 } // int cacheAdapt_policy_overload()
3295 void cacheAdapt_master() {
3296 // check the statistic data
3297 // for each page, decide the new cache strategy
3298 //int numchanged = cacheAdapt_policy_h4h();
3299 //int numchanged = cacheAdapt_policy_local();
3300 //int numchanged = cacheAdapt_policy_hotest();
3301 //int numchanged = cacheAdapt_policy_dominate();
3302 int numchanged = cacheAdapt_policy_overload();
3303 //int numchanged = cacheAdapt_policy_crowd();
3304 *gccachepolicytbl = numchanged;
3306 //if(numchanged > 0) tprintf("=================\n");
3309 // adapt the cache strategy for the mutator
3310 void cacheAdapt_mutator() {
3311 int numchanged = *gccachepolicytbl;
3312 // check the changes and adapt them
3313 int * tmp_p = gccachepolicytbl+1;
3314 while(numchanged--) {
3315 // read out the policy
3316 int page_index = *tmp_p;
3317 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3319 /*if(BAMBOO_NUM_OF_CORE == 0) {
3320 tprintf("va: %x, policy: %d (%d,%d) \n",
3321 (int)(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva), policy.cache_mode,
3322 policy.lotar_x, policy.lotar_y);
3325 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3326 policy, BAMBOO_PAGE_SIZE);
3330 //if(BAMBOO_NUM_OF_CORE == 0) tprintf("=================\n"); // TODO
3332 #endif // GC_CACHE_ADAPT
3334 inline void gc_collect(struct garbagelist * stackptr) {
3335 // inform the master that this core is at a gc safe point and is ready to
3337 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3338 self_numreceiveobjs, false);
3340 // core collector routine
3342 if(INITPHASE == gcphase) {
3346 #ifdef RAWPATH // TODO GC_DEBUG
3347 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3350 #ifdef GC_CACHE_ADAPT
3351 // prepare for cache adaption:
3352 cacheAdapt_gc(true);
3353 #endif // GC_CACHE_ADAPT
3354 //send init finish msg to core coordinator
3355 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3358 if(MARKPHASE == gcphase) {
3362 #ifdef RAWPATH // TODO GC_DEBUG
3363 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3364 udn_tile_coord_y());
3366 mark(true, stackptr);
3367 #ifdef RAWPATH // TODO GC_DEBUG
3368 printf("(%x,%x) Finish mark phase, start compact phase\n",
3369 udn_tile_coord_x(), udn_tile_coord_y());
3372 #ifdef RAWPATH // TODO GC_DEBUG
3373 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3374 udn_tile_coord_y());
3378 if(MAPPHASE == gcphase) {
3382 #ifdef RAWPATH // TODO GC_DEBUG
3383 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3384 udn_tile_coord_y());
3387 #ifdef RAWPATH // TODO GC_DEBUG
3388 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3389 udn_tile_coord_y());
3393 if(FLUSHPHASE == gcphase) {
3397 #ifdef RAWPATH // TODO GC_DEBUG
3398 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3399 udn_tile_coord_y());
3402 // send the num of obj/liveobj/forwardobj to the startupcore
3403 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3404 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3405 gc_num_liveobj, gc_num_forwardobj, false);
3408 #endif // GC_PROFLIE
3410 #ifdef RAWPATH // TODO GC_DEBUG
3411 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3412 udn_tile_coord_y());
3415 #ifdef GC_CACHE_ADAPT
3417 if(PREFINISHPHASE == gcphase) {
3421 #ifdef RAWPATH // TODO GC_DEBUG
3422 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3423 udn_tile_coord_y());
3425 // cache adapt phase
3426 cacheAdapt_mutator();
3427 cacheAdapt_gc(false);
3428 //send init finish msg to core coordinator
3429 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3430 #ifdef RAWPATH // TODO GC_DEBUG
3431 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3432 udn_tile_coord_y());
3434 #endif // GC_CACHE_ADAPT
3437 if(FINISHPHASE == gcphase) {
3441 #ifdef RAWPATH // TODO GC_DEBUG
3442 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3444 } // void gc_collect(struct garbagelist * stackptr)
3446 inline void gc_nocollect(struct garbagelist * stackptr) {
3447 // inform the master that this core is at a gc safe point and is ready to
3449 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3450 self_numreceiveobjs, false);
3453 if(INITPHASE == gcphase) {
3457 #ifdef RAWPATH // TODO GC_DEBUG
3458 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3461 #ifdef GC_CACHE_ADAPT
3462 // prepare for cache adaption:
3463 cacheAdapt_gc(true);
3464 #endif // GC_CACHE_ADAPT
3465 //send init finish msg to core coordinator
3466 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3469 if(MARKPHASE == gcphase) {
3473 #ifdef RAWPATH // TODO GC_DEBUG
3474 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3475 udn_tile_coord_y());
3477 mark(true, stackptr);
3478 #ifdef RAWPATH // TODO GC_DEBUG
3479 printf("(%x,%x) Finish mark phase, wait for flush\n",
3480 udn_tile_coord_x(), udn_tile_coord_y());
3483 // non-gc core collector routine
3485 if(FLUSHPHASE == gcphase) {
3489 #ifdef RAWPATH // TODO GC_DEBUG
3490 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3491 udn_tile_coord_y());
3494 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3495 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3496 gc_num_liveobj, gc_num_forwardobj, false);
3499 #endif // GC_PROFLIE
3501 #ifdef RAWPATH // TODO GC_DEBUG
3502 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3503 udn_tile_coord_y());
3506 #ifdef GC_CACHE_ADAPT
3508 if(PREFINISHPHASE == gcphase) {
3512 #ifdef RAWPATH // TODO GC_DEBUG
3513 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3514 udn_tile_coord_y());
3516 // cache adapt phase
3517 cacheAdapt_mutator();
3518 cacheAdapt_gc(false);
3519 //send init finish msg to core coordinator
3520 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3521 #ifdef RAWPATH // TODO GC_DEBUG
3522 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3523 udn_tile_coord_y());
3525 #endif // GC_CACHE_ADAPT
3528 if(FINISHPHASE == gcphase) {
3532 #ifdef RAWPATH // TODO GC_DEBUG
3533 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3535 } // void gc_collect(struct garbagelist * stackptr)
3537 inline void gc_master(struct garbagelist * stackptr) {
3539 gcphase = INITPHASE;
3541 waitconfirm = false;
3545 // Note: all cores need to init gc including non-gc cores
3546 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3547 // send GC init messages to all cores
3548 send_msg_1(i, GCSTARTINIT, false);
3550 bool isfirst = true;
3551 bool allStall = false;
3553 #ifdef GC_CACHE_ADAPT
3554 // prepare for cache adaption:
3555 cacheAdapt_gc(true);
3556 #endif // GC_CACHE_ADAPT
3558 #ifdef RAWPATH // TODO GC_DEBUG
3559 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3560 udn_tile_coord_y());
3563 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3565 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3566 if(gc_checkAllCoreStatus_I()) {
3567 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3570 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3575 #ifdef RAWPATH // TODO GC_DEBUG
3576 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3577 udn_tile_coord_y());
3579 // all cores have finished compacting
3580 // restore the gcstatus of all cores
3581 // Note: all cores have to do mark including non-gc cores
3582 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3583 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3584 gccorestatus[i] = 1;
3585 // send GC start messages to all cores
3586 send_msg_1(i, GCSTART, false);
3589 gcphase = MARKPHASE;
3591 while(MARKPHASE == gcphase) {
3592 mark(isfirst, stackptr);
3599 } // while(MARKPHASE == gcphase)
3600 // send msgs to all cores requiring large objs info
3601 // Note: only need to ask gc cores, non-gc cores do not host any objs
3602 numconfirm = NUMCORES4GC - 1;
3603 for(i = 1; i < NUMCORES4GC; ++i) {
3604 send_msg_1(i, GCLOBJREQUEST, false);
3606 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3611 } // wait for responses
3612 // check the heaptop
3613 if(gcheaptop < gcmarkedptrbound) {
3614 gcheaptop = gcmarkedptrbound;
3619 #ifdef RAWPATH // TODO GC_DEBUG
3620 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3621 udn_tile_coord_y());
3624 // cache all large objs
3626 // no enough space to cache large objs
3627 BAMBOO_EXIT(0xb107);
3629 // predict number of blocks to fill for each core
3631 int numpbc = loadbalance(&tmpheaptop);
3633 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3634 #ifdef RAWPATH // TODO GC_DEBUG
3635 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3636 udn_tile_coord_y());
3639 //int tmptopptr = 0;
3640 //BASEPTR(gctopcore, 0, &tmptopptr);
3642 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3643 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3645 BAMBOO_DEBUGPRINT(0xabab);
3646 BAMBOO_DEBUGPRINT_REG(tmptopptr);
3648 for(i = 0; i < NUMCORES4GC; ++i) {
3650 BASEPTR(i, numpbc, &tmpcoreptr);
3651 //send start compact messages to all cores
3652 //TODO bug here, do not know if the direction is positive or negtive?
3653 if (tmpcoreptr < tmpheaptop /*tmptopptr*/) {
3654 gcstopblock[i] = numpbc + 1;
3655 if(i != STARTUPCORE) {
3656 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3658 gcblock2fill = numpbc+1;
3659 } // if(i != STARTUPCORE)
3661 gcstopblock[i] = numpbc;
3662 if(i != STARTUPCORE) {
3663 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3665 gcblock2fill = numpbc;
3666 } // if(i != STARTUPCORE)
3669 BAMBOO_DEBUGPRINT(0xf000+i);
3670 BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3671 BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3673 // init some data strutures for compact phase
3675 gcfilledblocks[i] = 0;
3676 gcrequiredmems[i] = 0;
3686 bool finalcompact = false;
3687 // initialize pointers for comapcting
3688 struct moveHelper * orig =
3689 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3690 struct moveHelper * to =
3691 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3692 initOrig_Dst(orig, to);
3693 int filledblocks = 0;
3694 INTPTR heaptopptr = 0;
3695 bool finishcompact = false;
3696 bool iscontinue = true;
3697 bool localcompact = true;
3698 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3699 if((!finishcompact) && iscontinue) {
3701 BAMBOO_DEBUGPRINT(0xe001);
3702 BAMBOO_DEBUGPRINT_REG(numpbc);
3703 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3705 finishcompact = compacthelper(orig, to, &filledblocks,
3706 &heaptopptr, &localcompact);
3708 BAMBOO_DEBUGPRINT(0xe002);
3709 BAMBOO_DEBUGPRINT_REG(finishcompact);
3710 BAMBOO_DEBUGPRINT_REG(gctomove);
3711 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3712 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3713 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3717 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3718 if(gc_checkCoreStatus_I()) {
3719 // all cores have finished compacting
3720 // restore the gcstatus of all cores
3721 for(i = 0; i < NUMCORES4GC; ++i) {
3722 gccorestatus[i] = 1;
3724 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3727 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3728 // check if there are spare mem for pending move requires
3729 if(COMPACTPHASE == gcphase) {
3731 BAMBOO_DEBUGPRINT(0xe003);
3733 resolvePendingMoveRequest();
3735 BAMBOO_DEBUGPRINT_REG(gctomove);
3739 BAMBOO_DEBUGPRINT(0xe004);
3743 } // if(gc_checkCoreStatus_I()) else ...
3747 BAMBOO_DEBUGPRINT(0xe005);
3748 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3749 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3750 BAMBOO_DEBUGPRINT_REG(gctomove);
3752 to->ptr = gcmovestartaddr;
3753 to->numblocks = gcblock2fill - 1;
3754 to->bound = (to->numblocks==0) ?
3755 BAMBOO_SMEM_SIZE_L :
3756 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3757 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3758 to->offset = to->ptr - to->base;
3759 to->top = (to->numblocks==0) ?
3760 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3762 to->offset = BAMBOO_CACHE_LINE_SIZE;
3763 to->ptr += to->offset; // for header
3764 to->top += to->offset;
3765 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3766 localcompact = true;
3768 localcompact = false;
3772 } else if(!finishcompact) {
3776 } // while(COMPACTPHASE == gcphase)
3780 #ifdef RAWPATH // TODO GC_DEBUG
3781 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3782 udn_tile_coord_y());
3787 #ifdef RAWPATH // TODO GC_DEBUG
3788 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3789 udn_tile_coord_y());
3797 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3798 // Note: all cores should flush their runtime data including non-gc
3800 for(i = 1; i < NUMCORES4GC; ++i) {
3801 // send start flush messages to all cores
3802 gccorestatus[i] = 1;
3803 send_msg_1(i, GCSTARTMAPINFO, false);
3808 #ifdef RAWPATH // TODO GC_DEBUG
3809 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3810 udn_tile_coord_y());
3814 #ifdef RAWPATH // TODO GC_DEBUG
3815 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3816 udn_tile_coord_y());
3818 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3819 while(MAPPHASE == gcphase) {
3820 // check the status of all cores
3821 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3822 if(gc_checkCoreStatus_I()) {
3823 // all cores have finished sending mapping info
3824 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3827 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3828 } // while(MAPPHASE == gcphase)
3830 gcphase = FLUSHPHASE;
3831 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3832 // Note: all cores should flush their runtime data including non-gc
3834 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3835 // send start flush messages to all cores
3836 gccorestatus[i] = 1;
3837 send_msg_1(i, GCSTARTFLUSH, false);
3842 #ifdef RAWPATH // TODO GC_DEBUG
3843 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3844 udn_tile_coord_y());
3848 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3849 while(FLUSHPHASE == gcphase) {
3850 // check the status of all cores
3851 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3852 if(gc_checkAllCoreStatus_I()) {
3853 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3856 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3857 } // while(FLUSHPHASE == gcphase)
3858 #ifdef RAWPATH // TODO GC_DEBUG
3859 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3860 udn_tile_coord_y());
3863 #ifdef GC_CACHE_ADAPT
3864 // now the master core need to decide the new cache strategy
3865 cacheAdapt_master();
3867 gcphase = PREFINISHPHASE;
3868 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3869 // Note: all cores should flush their runtime data including non-gc
3871 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3872 // send start flush messages to all cores
3873 gccorestatus[i] = 1;
3874 send_msg_1(i, GCSTARTPREF, false);
3879 #ifdef RAWPATH // TODO GC_DEBUG
3880 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3881 udn_tile_coord_y());
3883 // cache adapt phase
3884 cacheAdapt_mutator();
3885 cacheAdapt_gc(false);
3886 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3887 while(PREFINISHPHASE == gcphase) {
3888 // check the status of all cores
3889 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3890 if(gc_checkAllCoreStatus_I()) {
3891 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3894 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3895 } // while(PREFINISHPHASE == gcphase)
3896 #endif // GC_CACHE_ADAPT
3898 gcphase = FINISHPHASE;
3900 // invalidate all shared mem pointers
3901 // put it here as it takes time to inform all the other cores to
3902 // finish gc and it might cause problem when some core resumes
3903 // mutator earlier than the other cores
3904 bamboo_cur_msp = NULL;
3905 bamboo_smem_size = 0;
3906 bamboo_smem_zero_top = NULL;
3908 gcprocessing = false;
3913 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3914 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3915 // send gc finish messages to all cores
3916 send_msg_1(i, GCFINISH, false);
3917 gccorestatus[i] = 1;
3919 #ifdef RAWPATH // TODO GC_DEBUG
3920 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
3921 udn_tile_coord_y());
3924 } // void gc_master(struct garbagelist * stackptr)
3926 inline bool gc(struct garbagelist * stackptr) {
3929 gcprocessing = false;
3933 // core coordinator routine
3934 if(0 == BAMBOO_NUM_OF_CORE) {
3936 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
3937 udn_tile_coord_y());
3939 bool isallstall = true;
3940 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3941 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3943 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
3944 if(gccorestatus[ti] != 0) {
3950 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3951 // some of the cores are still executing the mutator and did not reach
3952 // some gc safe point, therefore it is not ready to do gc
3953 // in case that there are some pregc information msg lost, send a confirm
3954 // msg to the 'busy' core
3955 send_msg_1(ti, GCSTARTPRE, false);
3963 //BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3964 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
3965 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
3968 BAMBOO_DEBUGPRINT(0xec04);
3970 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3971 sumsendobj += gcnumsendobjs[0][i];
3973 BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
3975 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3977 BAMBOO_DEBUGPRINT(0xec05);
3978 BAMBOO_DEBUGPRINT_REG(sumsendobj);
3980 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3981 sumsendobj -= gcnumreceiveobjs[0][i];
3983 BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
3985 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3987 BAMBOO_DEBUGPRINT(0xec06);
3988 BAMBOO_DEBUGPRINT_REG(sumsendobj);
3990 if(0 != sumsendobj) {
3991 // there were still some msgs on the fly, wait until there
3992 // are some update pregc information coming and check it again
3994 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4002 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4005 #ifdef RAWPATH // TODO GC_DEBUG
4006 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
4009 // Zero out the remaining bamboo_cur_msp
4010 // Only zero out the first 4 bytes of the remaining memory
4011 // Move the operation here because for the GC_CACHE_ADAPT version,
4012 // we need to make sure during the gcinit phase the shared heap is not
4013 // touched. Otherwise, there would be problem when adapt the cache
4015 if((bamboo_cur_msp != 0)
4016 && (bamboo_smem_zero_top == bamboo_cur_msp)
4017 && (bamboo_smem_size > 0)) {
4018 *((int *)bamboo_cur_msp) = 0;
4020 #ifdef GC_FLUSH_DTLB
4021 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4022 BAMBOO_CLEAN_DTLB();
4023 gc_num_flush_dtlb++;
4026 #ifdef GC_CACHE_ADAPT
4027 // disable the timer interrupt
4028 bamboo_mask_timer_intr();
4029 // get the sampling data
4030 bamboo_output_dtlb_sampling();
4031 #endif // GC_CACHE_ADAPT
4032 gcprocessing = true;
4033 gc_master(stackptr);
4034 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
4035 // Zero out the remaining bamboo_cur_msp
4036 // Only zero out the first 4 bytes of the remaining memory
4037 // Move the operation here because for the GC_CACHE_ADAPT version,
4038 // we need to make sure during the gcinit phase the shared heap is not
4039 // touched. Otherwise, there would be problem when adapt the cache
4041 if((bamboo_cur_msp != 0)
4042 && (bamboo_smem_zero_top == bamboo_cur_msp)
4043 && (bamboo_smem_size > 0)) {
4044 *((int *)bamboo_cur_msp) = 0;
4046 #ifdef GC_FLUSH_DTLB
4047 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4048 BAMBOO_CLEAN_DTLB();
4049 gc_num_flush_dtlb++;
4052 #ifdef GC_CACHE_ADAPT
4053 // disable the timer interrupt
4054 bamboo_mask_timer_intr();
4055 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4056 // get the sampling data
4057 bamboo_output_dtlb_sampling();
4059 #endif // GC_CACHE_ADAPT
4060 gcprocessing = true;
4061 gc_collect(stackptr);
4063 // invalidate all shared mem pointers
4064 bamboo_cur_msp = NULL;
4065 bamboo_smem_size = 0;
4066 bamboo_smem_zero_top = NULL;
4068 gcprocessing = false;
4070 // Zero out the remaining bamboo_cur_msp
4071 // Only zero out the first 4 bytes of the remaining memory
4072 // Move the operation here because for the GC_CACHE_ADAPT version,
4073 // we need to make sure during the gcinit phase the shared heap is not
4074 // touched. Otherwise, there would be problem when adapt the cache
4076 if((bamboo_cur_msp != 0)
4077 && (bamboo_smem_zero_top == bamboo_cur_msp)
4078 && (bamboo_smem_size > 0)) {
4079 *((int *)bamboo_cur_msp) = 0;
4081 #ifdef GC_FLUSH_DTLB
4082 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4083 BAMBOO_CLEAN_DTLB();
4084 gc_num_flush_dtlb++;
4087 #ifdef GC_CACHE_ADAPT
4088 // disable the timer interrupt
4089 bamboo_mask_timer_intr();
4090 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4091 // get the sampling data
4092 bamboo_output_dtlb_sampling();
4094 #endif // GC_CACHE_ADAPT
4095 // not a gc core, should wait for gcfinish msg
4096 gcprocessing = true;
4097 gc_nocollect(stackptr);
4099 // invalidate all shared mem pointers
4100 bamboo_cur_msp = NULL;
4101 bamboo_smem_size = 0;
4102 bamboo_smem_zero_top = NULL;
4104 gcprocessing = false;
4106 #ifdef GC_CACHE_ADAPT
4107 // reset the sampling arrays
4108 bamboo_dtlb_sampling_reset();
4109 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4110 // zero out the gccachesamplingtbl
4111 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4112 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4113 size_cachesamplingtbl_local_r);
4114 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
4115 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4118 // enable the timer interrupt
4119 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4120 bamboo_unmask_timer_intr();
4121 #endif // GC_CACHE_ADAPT
4123 } // void gc(struct garbagelist * stackptr)
4126 inline void gc_profileStart(void) {
4127 if(!gc_infoOverflow) {
4128 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4129 gc_infoArray[gc_infoIndex] = gcInfo;
4131 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4135 inline void gc_profileItem(void) {
4136 if(!gc_infoOverflow) {
4137 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4138 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4142 inline void gc_profileEnd(void) {
4143 if(!gc_infoOverflow) {
4144 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4145 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4146 gcInfo->time[gcInfo->index++] = gc_num_livespace;
4147 gcInfo->time[gcInfo->index++] = gc_num_freespace;
4148 gcInfo->time[gcInfo->index++] = gc_num_lobj;
4149 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4150 gcInfo->time[gcInfo->index++] = gc_num_obj;
4151 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4152 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4154 if(gc_infoIndex == GCINFOLENGTH) {
4155 gc_infoOverflow = true;
4156 //taskInfoIndex = 0;
4161 // output the profiling data
4162 void gc_outputProfileData() {
4165 unsigned long long totalgc = 0;
4167 //printf("Start Time, End Time, Duration\n");
4168 // output task related info
4169 for(i = 0; i < gc_infoIndex; i++) {
4170 GCInfo * gcInfo = gc_infoArray[i];
4171 unsigned long long tmp = 0;
4172 for(j = 0; j < gcInfo->index; j++) {
4173 printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
4174 tmp = gcInfo->time[j];
4176 tmp = (tmp-gcInfo->time[0]);
4177 printf(" ++ %lld \n", tmp);
4181 if(gc_infoOverflow) {
4182 printf("Caution: gc info overflow!\n");
4185 printf("\n\n total gc time: %lld \n", totalgc);
4189 unsigned long long totalgc = 0;
4191 #ifndef BAMBOO_MEMPROF
4192 BAMBOO_DEBUGPRINT(0xdddd);
4194 // output task related info
4195 for(i= 0; i < gc_infoIndex; i++) {
4196 GCInfo * gcInfo = gc_infoArray[i];
4197 #ifdef BAMBOO_MEMPROF
4198 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4200 unsigned long long tmp = 0;
4201 BAMBOO_DEBUGPRINT(0xddda);
4202 for(j = 0; j < gcInfo->index - 7; j++) {
4203 BAMBOO_DEBUGPRINT(gcInfo->time[j]);
4204 BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
4205 BAMBOO_DEBUGPRINT(0xdddb);
4206 tmp = gcInfo->time[j];
4208 tmp = (tmp-gcInfo->time[0]);
4209 BAMBOO_DEBUGPRINT_REG(tmp);
4210 BAMBOO_DEBUGPRINT(0xdddc);
4211 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]);
4212 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]);
4213 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]);
4214 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]);
4215 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]);
4216 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]);
4217 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]);
4218 BAMBOO_DEBUGPRINT(0xddde);
4222 #ifndef BAMBOO_MEMPROF
4223 BAMBOO_DEBUGPRINT(0xdddf);
4225 BAMBOO_DEBUGPRINT_REG(totalgc);
4227 if(gc_infoOverflow) {
4228 BAMBOO_DEBUGPRINT(0xefee);
4231 #ifndef BAMBOO_MEMPROF
4232 BAMBOO_DEBUGPRINT(0xeeee);
4236 #endif // #ifdef GC_PROFILE