3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 //void * dsts[NUMLOBJPTRS];
45 int lengths[NUMLOBJPTRS];
46 //void * origs[NUMLOBJPTRS];
47 int hosts[NUMLOBJPTRS];
48 struct lobjpointerblock *next;
49 struct lobjpointerblock *prev;
52 struct lobjpointerblock *gclobjhead=NULL;
53 int gclobjheadindex=0;
54 struct lobjpointerblock *gclobjtail=NULL;
55 int gclobjtailindex=0;
56 struct lobjpointerblock *gclobjtail2=NULL;
57 int gclobjtailindex2=0;
58 struct lobjpointerblock *gclobjspare=NULL;
61 typedef struct gc_cache_revise_info {
62 int orig_page_start_va;
68 int revised_sampling[NUMCORESACTIVE];
69 } gc_cache_revise_info_t;
70 gc_cache_revise_info_t gc_cache_revise_infomation;
71 #endif// GC_CACHE_ADAPT
74 // dump whole mem in blocks
75 inline void dumpSMem() {
83 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
85 // reserved blocks for sblocktbl
86 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
88 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
89 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
90 udn_tile_coord_x(), udn_tile_coord_y(),
91 *((int *)(i)), *((int *)(i + 4)),
92 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
93 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
94 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
95 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
96 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
97 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
98 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
100 sblock = gcreservedsb;
101 bool advanceblock = false;
103 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
104 advanceblock = false;
105 // computing sblock # and block #, core coordinate (x,y) also
106 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
108 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
109 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
121 coren = gc_block2core[block%(NUMCORES4GC*2)];
123 // compute core coordinate
124 BAMBOO_COORDS(coren, &x, &y);
125 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
126 udn_tile_coord_x(), udn_tile_coord_y(),
127 block, sblock++, x, y,
128 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
131 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
132 udn_tile_coord_x(), udn_tile_coord_y(),
133 *((int *)(i)), *((int *)(i + 4)),
134 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
135 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
136 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
137 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
138 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
139 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
140 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
142 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
146 // should be invoked with interruption closed
147 inline void gc_enqueue_I(void *ptr) {
149 BAMBOO_DEBUGPRINT(0xe601);
150 BAMBOO_DEBUGPRINT_REG(ptr);
152 if (gcheadindex==NUMPTRS) {
153 struct pointerblock * tmp;
158 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
159 } // if (gcspare!=NULL)
163 } // if (gcheadindex==NUMPTRS)
164 gchead->ptrs[gcheadindex++]=ptr;
166 BAMBOO_DEBUGPRINT(0xe602);
168 } // void gc_enqueue_I(void *ptr)
170 // dequeue and destroy the queue
171 inline void * gc_dequeue_I() {
172 if (gctailindex==NUMPTRS) {
173 struct pointerblock *tmp=gctail;
180 } // if (gcspare!=NULL)
181 } // if (gctailindex==NUMPTRS)
182 return gctail->ptrs[gctailindex++];
183 } // void * gc_dequeue()
185 // dequeue and do not destroy the queue
186 inline void * gc_dequeue2_I() {
187 if (gctailindex2==NUMPTRS) {
188 struct pointerblock *tmp=gctail2;
189 gctail2=gctail2->next;
191 } // if (gctailindex2==NUMPTRS)
192 return gctail2->ptrs[gctailindex2++];
193 } // void * gc_dequeue2()
195 inline int gc_moreItems_I() {
196 if ((gchead==gctail)&&(gctailindex==gcheadindex))
199 } // int gc_moreItems()
201 inline int gc_moreItems2_I() {
202 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
205 } // int gc_moreItems2()
207 // should be invoked with interruption closed
208 // enqueue a large obj: start addr & length
209 inline void gc_lobjenqueue_I(void *ptr,
213 BAMBOO_DEBUGPRINT(0xe901);
215 if (gclobjheadindex==NUMLOBJPTRS) {
216 struct lobjpointerblock * tmp;
217 if (gclobjspare!=NULL) {
221 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
222 } // if (gclobjspare!=NULL)
223 gclobjhead->next=tmp;
224 tmp->prev = gclobjhead;
227 } // if (gclobjheadindex==NUMLOBJPTRS)
228 gclobjhead->lobjs[gclobjheadindex]=ptr;
229 gclobjhead->lengths[gclobjheadindex]=length;
230 gclobjhead->hosts[gclobjheadindex++]=host;
232 BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
233 BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
234 BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
236 } // void gc_lobjenqueue_I(void *ptr...)
238 // dequeue and destroy the queue
239 inline void * gc_lobjdequeue_I(int * length,
241 if (gclobjtailindex==NUMLOBJPTRS) {
242 struct lobjpointerblock *tmp=gclobjtail;
243 gclobjtail=gclobjtail->next;
245 gclobjtail->prev = NULL;
246 if (gclobjspare!=NULL) {
252 } // if (gclobjspare!=NULL)
253 } // if (gclobjtailindex==NUMLOBJPTRS)
255 *length = gclobjtail->lengths[gclobjtailindex];
258 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
260 return gclobjtail->lobjs[gclobjtailindex++];
261 } // void * gc_lobjdequeue()
263 inline int gc_lobjmoreItems_I() {
264 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
267 } // int gc_lobjmoreItems()
269 // dequeue and don't destroy the queue
270 inline void gc_lobjdequeue2_I() {
271 if (gclobjtailindex2==NUMLOBJPTRS) {
272 gclobjtail2=gclobjtail2->next;
276 } // if (gclobjtailindex2==NUMLOBJPTRS)
277 } // void * gc_lobjdequeue2()
279 inline int gc_lobjmoreItems2_I() {
280 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
283 } // int gc_lobjmoreItems2()
285 // 'reversly' dequeue and don't destroy the queue
286 inline void gc_lobjdequeue3_I() {
287 if (gclobjtailindex2==0) {
288 gclobjtail2=gclobjtail2->prev;
289 gclobjtailindex2=NUMLOBJPTRS-1;
292 } // if (gclobjtailindex2==NUMLOBJPTRS)
293 } // void * gc_lobjdequeue3()
295 inline int gc_lobjmoreItems3_I() {
296 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
299 } // int gc_lobjmoreItems3()
301 inline void gc_lobjqueueinit4_I() {
302 gclobjtail2 = gclobjtail;
303 gclobjtailindex2 = gclobjtailindex;
304 } // void gc_lobjqueueinit2()
306 inline void * gc_lobjdequeue4_I(int * length,
308 if (gclobjtailindex2==NUMLOBJPTRS) {
309 gclobjtail2=gclobjtail2->next;
311 } // if (gclobjtailindex==NUMLOBJPTRS)
313 *length = gclobjtail2->lengths[gclobjtailindex2];
316 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
318 return gclobjtail2->lobjs[gclobjtailindex2++];
319 } // void * gc_lobjdequeue()
321 inline int gc_lobjmoreItems4_I() {
322 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
325 } // int gc_lobjmoreItems(
327 INTPTR gccurr_heapbound = 0;
329 inline void gettype_size(void * ptr,
332 int type = ((int *)ptr)[0];
334 if(type < NUMCLASSES) {
336 size = classsize[type];
339 struct ArrayObject *ao=(struct ArrayObject *)ptr;
340 int elementsize=classsize[type];
341 int length=ao->___length___;
342 size=sizeof(struct ArrayObject)+length*elementsize;
343 } // if(type < NUMCLASSES)
348 inline bool isLarge(void * ptr,
352 BAMBOO_DEBUGPRINT(0xe701);
353 BAMBOO_DEBUGPRINT_REG(ptr);
355 // check if a pointer is referring to a large object
356 gettype_size(ptr, ttype, tsize);
358 BAMBOO_DEBUGPRINT(*tsize);
360 int bound = (BAMBOO_SMEM_SIZE);
361 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
362 bound = (BAMBOO_SMEM_SIZE_L);
364 if((((int)ptr-gcbaseva)%(bound))==0) {
365 // ptr is a start of a block
367 BAMBOO_DEBUGPRINT(0xe702);
368 BAMBOO_DEBUGPRINT(1);
372 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
373 // it acrosses the boundary of current block
375 BAMBOO_DEBUGPRINT(0xe703);
376 BAMBOO_DEBUGPRINT(1);
381 BAMBOO_DEBUGPRINT(0);
384 } // bool isLarge(void * ptr, int * ttype, int * tsize)
386 inline int hostcore(void * ptr) {
387 // check the host core of ptr
389 RESIDECORE(ptr, &host);
391 BAMBOO_DEBUGPRINT(0xedd0);
392 BAMBOO_DEBUGPRINT_REG(ptr);
393 BAMBOO_DEBUGPRINT_REG(host);
396 } // int hostcore(void * ptr)
398 inline void cpu2coords(int coren,
401 *x = bamboo_cpu2coords[2*coren];
402 *y = bamboo_cpu2coords[2*coren+1];
403 } // void cpu2coords(...)
405 inline bool isLocal(void * ptr) {
406 // check if a pointer is in shared heap on this core
407 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
408 } // bool isLocal(void * ptr)
410 inline bool gc_checkCoreStatus_I() {
411 bool allStall = true;
412 for(int i = 0; i < NUMCORES4GC; ++i) {
413 if(gccorestatus[i] != 0) {
416 } // if(gccorestatus[i] != 0)
417 } // for(i = 0; i < NUMCORES4GC; ++i)
421 inline bool gc_checkAllCoreStatus_I() {
422 bool allStall = true;
423 for(int i = 0; i < NUMCORESACTIVE; ++i) {
424 if(gccorestatus[i] != 0) {
427 } // if(gccorestatus[i] != 0)
428 } // for(i = 0; i < NUMCORESACTIVE; ++i)
432 inline void checkMarkStatue() {
434 BAMBOO_DEBUGPRINT(0xee01);
438 (waitconfirm && (numconfirm == 0))) {
440 BAMBOO_DEBUGPRINT(0xee02);
445 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
448 entry_index = gcnumsrobjs_index;
450 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
451 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
452 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
453 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
454 // check the status of all cores
455 bool allStall = gc_checkAllCoreStatus_I();
457 BAMBOO_DEBUGPRINT(0xee03);
461 BAMBOO_DEBUGPRINT(0xee04);
466 BAMBOO_DEBUGPRINT(0xee05);
468 // the first time found all cores stall
469 // send out status confirm msg to all other cores
470 // reset the corestatus array too
471 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
473 numconfirm = NUMCORESACTIVE - 1;
474 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
475 for(i = 1; i < NUMCORESACTIVE; ++i) {
477 // send mark phase finish confirm request msg to core i
478 send_msg_1(i, GCMARKCONFIRM, false);
479 } // for(i = 1; i < NUMCORESACTIVE; ++i)
482 // check if the sum of send objs and receive obj are the same
483 // yes->check if the info is the latest; no->go on executing
485 for(i = 0; i < NUMCORESACTIVE; ++i) {
486 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
487 } // for(i = 0; i < NUMCORESACTIVE; ++i)
489 BAMBOO_DEBUGPRINT(0xee06);
490 BAMBOO_DEBUGPRINT_REG(sumsendobj);
492 for(i = 0; i < NUMCORESACTIVE; ++i) {
493 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
494 } // for(i = 0; i < NUMCORESACTIVE; ++i)
496 BAMBOO_DEBUGPRINT(0xee07);
497 BAMBOO_DEBUGPRINT_REG(sumsendobj);
499 if(0 == sumsendobj) {
500 // Check if there are changes of the numsendobjs or numreceiveobjs on
502 bool ischanged = false;
503 for(i = 0; i < NUMCORESACTIVE; ++i) {
504 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
505 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
509 } // for(i = 0; i < NUMCORESACTIVE; ++i)
511 BAMBOO_DEBUGPRINT(0xee08);
512 BAMBOO_DEBUGPRINT_REG(ischanged);
516 BAMBOO_DEBUGPRINT(0xee09);
518 // all the core status info are the latest
520 gcphase = COMPACTPHASE;
521 // restore the gcstatus for all cores
522 for(i = 0; i < NUMCORESACTIVE; ++i) {
524 } // for(i = 0; i < NUMCORESACTIVE; ++i)
527 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
530 // There were changes between phase 1 and phase 2, can not decide
531 // whether the mark phase has been finished
533 // As it fails in phase 2, flip the entries
534 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
535 } // if(0 == sumsendobj) else ...
536 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
537 } // if(!gcwaitconfirm) else()
539 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
541 } // if((!waitconfirm)...
543 BAMBOO_DEBUGPRINT(0xee0a);
545 } // void checkMarkStatue()
547 inline bool preGC() {
548 // preparation for gc
549 // make sure to clear all incoming msgs espacially transfer obj msgs
551 BAMBOO_DEBUGPRINT(0xec01);
555 (waitconfirm && (numconfirm == 0))) {
556 // send out status confirm msgs to all cores to check if there are
557 // transfer obj msgs on-the-fly
559 numconfirm = NUMCORESACTIVE - 1;
560 for(i = 1; i < NUMCORESACTIVE; ++i) {
562 // send status confirm msg to core i
563 send_msg_1(i, STATUSCONFIRM, false);
564 } // for(i = 1; i < NUMCORESACTIVE; ++i)
567 BAMBOO_DEBUGPRINT(0xec02);
570 if(numconfirm == 0) {
573 } // wait for confirmations
577 BAMBOO_DEBUGPRINT(0xec03);
579 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
580 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
583 BAMBOO_DEBUGPRINT(0xec04);
585 for(i = 0; i < NUMCORESACTIVE; ++i) {
586 sumsendobj += numsendobjs[i];
588 BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
590 } // for(i = 1; i < NUMCORESACTIVE; ++i)
592 BAMBOO_DEBUGPRINT(0xec05);
593 BAMBOO_DEBUGPRINT_REG(sumsendobj);
595 for(i = 0; i < NUMCORESACTIVE; ++i) {
596 sumsendobj -= numreceiveobjs[i];
598 BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
600 } // for(i = 1; i < NUMCORESACTIVE; ++i)
602 BAMBOO_DEBUGPRINT(0xec06);
603 BAMBOO_DEBUGPRINT_REG(sumsendobj);
605 if(0 == sumsendobj) {
608 // still have some transfer obj msgs on-the-fly, can not start gc
610 } // if(0 == sumsendobj)
613 BAMBOO_DEBUGPRINT(0xec07);
615 // previously asked for status confirmation and do not have all the
616 // confirmations yet, can not start gc
618 } // if((!waitconfirm) ||
621 inline void initGC() {
623 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
624 for(i = 0; i < NUMCORES4GC; ++i) {
626 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
627 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
629 gcrequiredmems[i] = 0;
630 gcfilledblocks[i] = 0;
632 } // for(i = 0; i < NUMCORES4GC; ++i)
633 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
635 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
636 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
641 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
642 gcself_numsendobjs = 0;
643 gcself_numreceiveobjs = 0;
644 gcmarkedptrbound = 0;
647 //gcismapped = false;
658 gcheadindex=gctailindex=gctailindex2 = 0;
659 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
661 gctailindex = gctailindex2 = gcheadindex;
662 gctail = gctail2 = gchead;
665 // initialize the large obj queues
666 if (gclobjhead==NULL) {
669 gclobjtailindex2 = 0;
670 gclobjhead=gclobjtail=gclobjtail2=
671 RUNMALLOC(sizeof(struct lobjpointerblock));
673 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
674 gclobjtail = gclobjtail2 = gclobjhead;
676 gclobjhead->next = gclobjhead->prev = NULL;
678 #ifdef LOCALHASHTBL_TEST
679 freeRuntimeHash(gcpointertbl);
680 gcpointertbl = allocateRuntimeHash(20);
682 mgchashreset(gcpointertbl);
684 //gcpointertbl = allocateMGCHash(20);
686 freeMGCHash(gcforwardobjtbl);
687 gcforwardobjtbl = allocateMGCHash(20, 3);
689 // initialize the mapping info related structures
690 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
691 // Never free the shared hash table, just reset it
692 /*freeGCSharedHash(gcsharedptbl);
693 gcsharedptbl = allocateGCSharedHash(20);*/
694 mgcsharedhashReset(gcsharedptbl);
696 // Zero out the remaining bamboo_cur_msp
697 // Only zero out the first 4 bytes of the remaining memory
698 /*if((bamboo_cur_msp != 0)
699 && (bamboo_smem_zero_top == bamboo_cur_msp)
700 && (bamboo_smem_size > 0)) {
701 *((int *)bamboo_cur_msp) = 0;
704 gc_num_livespace = 0;
705 gc_num_freespace = 0;
707 gc_num_lobjspace = 0;
709 gc_num_forwardobj = 0;
710 gc_num_profiles = NUMCORESACTIVE - 1;
714 // compute load balance for all cores
715 inline int loadbalance(int * heaptop) {
716 // compute load balance
719 // get the total loads
720 int tloads = gcloads[STARTUPCORE];
721 for(i = 1; i < NUMCORES4GC; i++) {
722 tloads += gcloads[i];
724 *heaptop = gcbaseva + tloads;
727 BAMBOO_DEBUGPRINT(0xdddd);
728 BAMBOO_DEBUGPRINT_REG(tloads);
729 BAMBOO_DEBUGPRINT_REG(*heaptop);
732 BLOCKINDEX(*heaptop, &b);
733 int numbpc = b / NUMCORES4GC; // num of blocks per core
735 BAMBOO_DEBUGPRINT_REG(b);
736 BAMBOO_DEBUGPRINT_REG(numbpc);
739 RESIDECORE(heaptop, &gctopcore);
741 BAMBOO_DEBUGPRINT_REG(gctopcore);
744 } // void loadbalance(int * heaptop)
746 inline bool cacheLObjs() {
747 // check the total mem size need for large objs
748 unsigned long long sumsize = 0;
751 BAMBOO_DEBUGPRINT(0xe801);
753 gclobjtail2 = gclobjtail;
754 gclobjtailindex2 = gclobjtailindex;
758 // compute total mem size required and sort the lobjs in ascending order
759 while(gc_lobjmoreItems2_I()) {
761 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
762 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
763 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
769 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
770 BAMBOO_DEBUGPRINT_REG(tmp_len);
771 BAMBOO_DEBUGPRINT_REG(sumsize);
773 int i = gclobjtailindex2-1;
774 struct lobjpointerblock * tmp_block = gclobjtail2;
775 // find the place to insert
778 if(tmp_block->prev == NULL) {
781 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
782 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
783 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
784 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
785 tmp_block = tmp_block->prev;
789 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
791 if(tmp_block->lobjs[i-1] > tmp_lobj) {
792 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
793 tmp_block->lengths[i] = tmp_block->lengths[i-1];
794 tmp_block->hosts[i] = tmp_block->hosts[i-1];
798 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
799 } // if(i ==0 ) else {}
802 if(i != gclobjtailindex2 - 1) {
803 tmp_block->lobjs[i] = tmp_lobj;
804 tmp_block->lengths[i] = tmp_len;
805 tmp_block->hosts[i] = tmp_host;
807 } // while(gc_lobjmoreItems2())
810 gc_num_lobjspace = sumsize;
812 // check if there are enough space to cache these large objs
813 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
814 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
815 // do not have enough room to cache large objs
817 BAMBOO_DEBUGPRINT(0xe802);
818 BAMBOO_DEBUGPRINT_REG(dst);
819 BAMBOO_DEBUGPRINT_REG(gcheaptop);
820 BAMBOO_DEBUGPRINT_REG(sumsize);
825 BAMBOO_DEBUGPRINT(0xe803);
826 BAMBOO_DEBUGPRINT_REG(dst);
827 BAMBOO_DEBUGPRINT_REG(gcheaptop);
830 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
831 // cache the largeObjs to the top of the shared heap
832 //gclobjtail2 = gclobjtail;
833 //gclobjtailindex2 = gclobjtailindex;
834 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
835 while(gc_lobjmoreItems3_I()) {
837 size = gclobjtail2->lengths[gclobjtailindex2];
838 // set the mark field to , indicating that this obj has been moved
839 // and need to be flushed
840 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
842 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
843 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
845 //BAMBOO_WRITE_HINT_CACHE(dst, size);
846 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
849 BAMBOO_DEBUGPRINT(0x804);
850 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
851 BAMBOO_DEBUGPRINT(dst);
852 BAMBOO_DEBUGPRINT_REG(size);
853 BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
854 BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
858 } // void cacheLObjs()
860 // update the bmmboo_smemtbl to record current shared mem usage
861 void updateSmemTbl(int coren,
864 int bound = BAMBOO_SMEM_SIZE_L;
865 BLOCKINDEX(localtop, <opcore);
866 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
867 bound = BAMBOO_SMEM_SIZE;
869 int load = (localtop-gcbaseva)%bound;
874 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
875 if(toset < ltopcore) {
876 bamboo_smemtbl[toset]=
877 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
879 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
881 } else if(toset == ltopcore) {
882 bamboo_smemtbl[toset] = load;
884 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
896 } // void updateSmemTbl(int, int)
898 inline void moveLObjs() {
900 BAMBOO_DEBUGPRINT(0xea01);
903 // update the gcmem_mixed_usedmem
904 gcmem_mixed_usedmem = 0;
906 // zero out the smemtbl
907 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
908 // find current heap top
909 // flush all gcloads to indicate the real heap top on one core
910 // previous it represents the next available ptr on a core
911 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
912 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
913 // edge of a block, check if this is exactly the heaptop
914 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
915 gcloads[0]+=(gcfilledblocks[0]>1 ?
916 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
918 updateSmemTbl(0, gcloads[0]);
920 BAMBOO_DEBUGPRINT(0xea02);
921 BAMBOO_DEBUGPRINT_REG(gcloads[0]);
922 BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
924 for(int i = 1; i < NUMCORES4GC; i++) {
927 BAMBOO_DEBUGPRINT(0xf000+i);
928 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
929 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
931 if((gcfilledblocks[i] > 0)
932 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
933 // edge of a block, check if this is exactly the heaptop
934 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
936 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
939 updateSmemTbl(i, gcloads[i]);
941 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
943 } // for(int i = 1; i < NUMCORES4GC; i++) {
945 // find current heap top
947 // a bug here: when using local allocation, directly move large objects
948 // to the highest free chunk might not be memory efficient
953 for(i = gcnumblock-1; i >= 0; i--) {
954 if(bamboo_smemtbl[i] > 0) {
959 tmpheaptop = gcbaseva;
961 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
962 (BAMBOO_SMEM_SIZE_L*i) :
963 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
966 // move large objs from gcheaptop to tmpheaptop
967 // write the header first
968 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
970 gcmem_mixed_usedmem += tomove;
973 BAMBOO_DEBUGPRINT(0xea03);
974 BAMBOO_DEBUGPRINT_REG(tomove);
975 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
976 BAMBOO_DEBUGPRINT_REG(gcheaptop);
978 // flush the sbstartbl
979 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
980 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
982 gcheaptop = tmpheaptop;
984 // check how many blocks it acrosses
985 int remain = tmpheaptop-gcbaseva;
986 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
987 int b = 0; // number of the block
988 BLOCKINDEX(tmpheaptop, &b);
989 // check the remaining space in this block
990 bound = (BAMBOO_SMEM_SIZE);
991 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
992 bound = (BAMBOO_SMEM_SIZE_L);
994 remain = bound - remain%bound;
997 BAMBOO_DEBUGPRINT(0xea04);
1003 int base = tmpheaptop;
1005 remain -= BAMBOO_CACHE_LINE_SIZE;
1006 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1007 gc_lobjqueueinit4_I();
1008 while(gc_lobjmoreItems4_I()) {
1009 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
1010 ALIGNSIZE(size, &isize);
1011 if(remain < isize) {
1012 // this object acrosses blocks
1014 // close current block, fill its header
1015 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1016 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1017 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
1021 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1022 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1024 remain -= BAMBOO_CACHE_LINE_SIZE;
1025 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1026 BLOCKINDEX(tmpheaptop, &b);
1027 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
1028 } // if(cpysize > 0)
1030 // move the large obj
1031 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1032 memmove(tmpheaptop, gcheaptop, size);
1034 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1035 memcpy(tmpheaptop, gcheaptop, size);
1037 // fill the remaining space with -2 padding
1038 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1040 BAMBOO_DEBUGPRINT(0xea05);
1041 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1042 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1043 BAMBOO_DEBUGPRINT_REG(size);
1044 BAMBOO_DEBUGPRINT_REG(isize);
1045 BAMBOO_DEBUGPRINT_REG(base);
1048 // cache the mapping info anyway
1049 //if(ptr != tmpheaptop) {
1050 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1051 #ifdef LOCALHASHTBL_TEST
1052 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1054 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1056 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1057 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1060 BAMBOO_DEBUGPRINT(0xcdca);
1061 BAMBOO_DEBUGPRINT_REG(ptr);
1062 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1064 if(host != BAMBOO_NUM_OF_CORE) {
1065 // send the original host core with the mapping info
1066 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1068 BAMBOO_DEBUGPRINT(0xcdcb);
1069 BAMBOO_DEBUGPRINT_REG(ptr);
1070 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1072 } // if(host != BAMBOO_NUM_OF_CORE)
1073 tmpheaptop += isize;
1075 // set the gcsbstarttbl and bamboo_smemtbl
1076 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
1077 for(int k = 1; k < tmpsbs; k++) {
1078 gcsbstarttbl[sb+k] = (INTPTR)(-1);
1081 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1082 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
1083 for(; b < tmpsbs; b++) {
1084 bamboo_smemtbl[b] = bound;
1085 if(b==NUMCORES4GC-1) {
1086 bound = BAMBOO_SMEM_SIZE;
1089 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
1090 gcsbstarttbl[sb] = (INTPTR)(-1);
1091 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1092 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1093 bamboo_smemtbl[b] = bound;
1095 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1096 remain = tmpheaptop-gcbaseva;
1097 bamboo_smemtbl[b] = remain%bound;
1098 remain = bound - bamboo_smemtbl[b];
1099 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1101 // close current block and fill the header
1102 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1103 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1106 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1107 // fill with 0 in case
1108 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
1110 remain -= BAMBOO_CACHE_LINE_SIZE;
1111 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1114 // move the large obj
1115 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1116 memmove(tmpheaptop, gcheaptop, size);
1118 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1119 memcpy(tmpheaptop, gcheaptop, size);
1121 // fill the remaining space with -2 padding
1122 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1124 BAMBOO_DEBUGPRINT(0xea06);
1125 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1126 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1127 BAMBOO_DEBUGPRINT_REG(size);
1128 BAMBOO_DEBUGPRINT_REG(isize);
1133 // cache the mapping info anyway
1134 //if(ptr != tmpheaptop) {
1135 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1136 #ifdef LOCALHASHTBL_TEST
1137 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1139 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1141 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1142 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1145 BAMBOO_DEBUGPRINT(0xcdcc);
1146 BAMBOO_DEBUGPRINT_REG(ptr);
1147 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1148 BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1150 if(host != BAMBOO_NUM_OF_CORE) {
1151 // send the original host core with the mapping info
1152 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1154 BAMBOO_DEBUGPRINT(0xcdcd);
1155 BAMBOO_DEBUGPRINT_REG(ptr);
1156 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1158 } // if(host != BAMBOO_NUM_OF_CORE)
1159 tmpheaptop += isize;
1161 // update bamboo_smemtbl
1162 bamboo_smemtbl[b] += isize;
1163 } // if(remain < isize) else ...
1164 } // while(gc_lobjmoreItems())
1166 // close current block, fill the header
1167 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1168 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1169 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
1171 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1173 gcheaptop = tmpheaptop;
1175 } // if(tomove == 0)
1178 BAMBOO_DEBUGPRINT(0xea07);
1179 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1182 bamboo_free_block = 0;
1185 tbound = (bamboo_free_block<NUMCORES4GC) ?
1186 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1187 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1188 bamboo_free_block++;
1190 // the first non-full partition
1196 // check how many live space there are
1197 gc_num_livespace = 0;
1198 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1199 gc_num_livespace += bamboo_smemtbl[tmpi];
1201 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1204 BAMBOO_DEBUGPRINT(0xea08);
1205 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1207 } // void moveLObjs()
1209 inline void markObj(void * objptr) {
1210 if(objptr == NULL) {
1213 if(ISSHAREDOBJ(objptr)) {
1214 int host = hostcore(objptr);
1215 if(BAMBOO_NUM_OF_CORE == host) {
1217 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1218 if(((int *)objptr)[6] == INIT) {
1219 // this is the first time that this object is discovered,
1220 // set the flag as DISCOVERED
1221 ((int *)objptr)[6] |= DISCOVERED;
1222 BAMBOO_CACHE_FLUSH_LINE(objptr);
1223 gc_enqueue_I(objptr);
1225 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1228 BAMBOO_DEBUGPRINT(0xbbbb);
1229 BAMBOO_DEBUGPRINT_REG(host);
1230 BAMBOO_DEBUGPRINT_REG(objptr);
1232 // check if this obj has been forwarded
1233 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1234 // send a msg to host informing that objptr is active
1235 send_msg_2(host, GCMARKEDOBJ, objptr, /*BAMBOO_NUM_OF_CORE,*/ false);
1237 gc_num_forwardobj++;
1238 #endif // GC_PROFILE
1239 gcself_numsendobjs++;
1240 MGCHashadd(gcforwardobjtbl, (int)objptr);
1244 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1245 gc_enqueue_I(objptr);
1246 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1247 } // if(ISSHAREDOBJ(objptr))
1248 } // void markObj(void * objptr)
1250 // enqueue root objs
1251 inline void tomark(struct garbagelist * stackptr) {
1252 if(MARKPHASE != gcphase) {
1254 BAMBOO_DEBUGPRINT_REG(gcphase);
1256 BAMBOO_EXIT(0xb101);
1258 gcbusystatus = true;
1262 // enqueue current stack
1263 while(stackptr!=NULL) {
1265 BAMBOO_DEBUGPRINT(0xe501);
1266 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1267 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1268 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1270 for(i=0; i<stackptr->size; i++) {
1271 if(stackptr->array[i] != NULL) {
1272 markObj(stackptr->array[i]);
1275 stackptr=stackptr->next;
1279 BAMBOO_DEBUGPRINT(0xe503);
1281 // enqueue objectsets
1282 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1283 for(i=0; i<NUMCLASSES; i++) {
1284 struct parameterwrapper ** queues =
1285 objectqueues[BAMBOO_NUM_OF_CORE][i];
1286 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1287 for(j = 0; j < length; ++j) {
1288 struct parameterwrapper * parameter = queues[j];
1289 struct ObjectHash * set=parameter->objectset;
1290 struct ObjectNode * ptr=set->listhead;
1292 markObj((void *)ptr->key);
1299 // euqueue current task descriptor
1300 if(currtpd != NULL) {
1302 BAMBOO_DEBUGPRINT(0xe504);
1304 for(i=0; i<currtpd->numParameters; i++) {
1305 markObj(currtpd->parameterArray[i]);
1310 BAMBOO_DEBUGPRINT(0xe505);
1312 // euqueue active tasks
1313 if(activetasks != NULL) {
1314 struct genpointerlist * ptr=activetasks->list;
1316 struct taskparamdescriptor *tpd=ptr->src;
1318 for(i=0; i<tpd->numParameters; i++) {
1319 markObj(tpd->parameterArray[i]);
1326 BAMBOO_DEBUGPRINT(0xe506);
1328 // enqueue cached transferred obj
1329 struct QueueItem * tmpobjptr = getHead(&objqueue);
1330 while(tmpobjptr != NULL) {
1331 struct transObjInfo * objInfo =
1332 (struct transObjInfo *)(tmpobjptr->objectptr);
1333 markObj(objInfo->objptr);
1334 tmpobjptr = getNextQueueItem(tmpobjptr);
1338 BAMBOO_DEBUGPRINT(0xe507);
1340 // enqueue cached objs to be transferred
1341 struct QueueItem * item = getHead(totransobjqueue);
1342 while(item != NULL) {
1343 struct transObjInfo * totransobj =
1344 (struct transObjInfo *)(item->objectptr);
1345 markObj(totransobj->objptr);
1346 item = getNextQueueItem(item);
1347 } // while(item != NULL)
1350 BAMBOO_DEBUGPRINT(0xe508);
1352 // enqueue lock related info
1353 for(i = 0; i < runtime_locklen; ++i) {
1354 markObj((void *)(runtime_locks[i].redirectlock));
1355 if(runtime_locks[i].value != NULL) {
1356 markObj((void *)(runtime_locks[i].value));
1360 } // void tomark(struct garbagelist * stackptr)
1362 inline void mark(bool isfirst,
1363 struct garbagelist * stackptr) {
1365 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
1369 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
1371 // enqueue root objs
1373 gccurr_heaptop = 0; // record the size of all active objs in this core
1374 // aligned but does not consider block boundaries
1375 gcmarkedptrbound = 0;
1378 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
1381 bool checkfield = true;
1382 bool sendStall = false;
1384 while(MARKPHASE == gcphase) {
1386 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
1389 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1390 bool hasItems = gc_moreItems2_I();
1391 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1393 BAMBOO_DEBUGPRINT(0xed05);
1399 gcbusystatus = true;
1401 void * ptr = gc_dequeue2_I();
1404 BAMBOO_DEBUGPRINT_REG(ptr);
1409 // check if it is a shared obj
1410 if(ISSHAREDOBJ(ptr)) {
1411 // a shared obj, check if it is a local obj on this core
1412 int host = hostcore(ptr);
1413 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1415 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1416 if(isLarge(ptr, &type, &size) && isnotmarked) {
1417 // ptr is a large object and not marked or enqueued
1419 BAMBOO_DEBUGPRINT(0xecec);
1420 BAMBOO_DEBUGPRINT_REG(ptr);
1421 BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1423 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1424 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1426 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1428 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1429 BAMBOO_CACHE_FLUSH_LINE(ptr);
1430 } else if(isnotmarked) {
1431 // ptr is an unmarked active object on this core
1432 ALIGNSIZE(size, &isize);
1433 gccurr_heaptop += isize;
1435 BAMBOO_DEBUGPRINT(0xaaaa);
1436 BAMBOO_DEBUGPRINT_REG(ptr);
1437 BAMBOO_DEBUGPRINT_REG(isize);
1438 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1441 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1442 BAMBOO_CACHE_FLUSH_LINE(ptr);
1444 if(ptr + size > gcmarkedptrbound) {
1445 gcmarkedptrbound = ptr + size;
1446 } // if(ptr + size > gcmarkedptrbound)
1448 // ptr is not an active obj or has been marked
1450 } // if(isLarge(ptr, &type, &size)) else ...
1451 } /* can never reach here
1454 if(BAMBOO_NUM_OF_CORE == 0) {
1455 BAMBOO_DEBUGPRINT(0xbbbb);
1456 BAMBOO_DEBUGPRINT_REG(host);
1457 BAMBOO_DEBUGPRINT_REG(ptr);
1460 // check if this obj has been forwarded
1461 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1462 // send a msg to host informing that ptr is active
1463 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1464 gcself_numsendobjs++;
1465 MGCHashadd(gcforwardobjtbl, (int)ptr);
1468 }// if(isLocal(ptr)) else ...*/
1469 } // if(ISSHAREDOBJ(ptr))
1471 BAMBOO_DEBUGPRINT(0xed06);
1475 // scan all pointers in ptr
1476 unsigned INTPTR * pointer;
1477 pointer=pointerarray[type];
1479 /* Array of primitives */
1481 } else if (((INTPTR)pointer)==1) {
1482 /* Array of pointers */
1483 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1484 int length=ao->___length___;
1486 for(j=0; j<length; j++) {
1488 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1492 INTPTR size=pointer[0];
1494 for(i=1; i<=size; i++) {
1495 unsigned int offset=pointer[i];
1496 void * objptr=*((void **)(((char *)ptr)+offset));
1499 } // if (pointer==0) else if ... else ...
1501 } // while(gc_moreItems2())
1503 BAMBOO_DEBUGPRINT(0xed07);
1505 gcbusystatus = false;
1506 // send mark finish msg to core coordinator
1507 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1509 BAMBOO_DEBUGPRINT(0xed08);
1511 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1512 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1513 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1514 gcself_numreceiveobjs;
1515 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1519 BAMBOO_DEBUGPRINT(0xed09);
1521 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1522 gcself_numsendobjs, gcself_numreceiveobjs, false);
1525 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1527 BAMBOO_DEBUGPRINT(0xed0a);
1530 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1532 BAMBOO_DEBUGPRINT(0xed0b);
1536 } // while(MARKPHASE == gcphase)
1541 inline void compact2Heaptophelper_I(int coren,
1546 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1547 if(STARTUPCORE == coren) {
1549 gcmovestartaddr = *p;
1550 gcdstcore = gctopcore;
1551 gcblock2fill = *numblocks + 1;
1553 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1556 BAMBOO_DEBUGPRINT_REG(coren);
1557 BAMBOO_DEBUGPRINT_REG(gctopcore);
1558 BAMBOO_DEBUGPRINT_REG(*p);
1559 BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1561 if(memneed < *remain) {
1563 BAMBOO_DEBUGPRINT(0xd104);
1566 gcrequiredmems[coren] = 0;
1567 gcloads[gctopcore] += memneed;
1568 *remain = *remain - memneed;
1571 BAMBOO_DEBUGPRINT(0xd105);
1573 // next available block
1575 gcfilledblocks[gctopcore] += 1;
1577 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1578 gcloads[gctopcore] = newbase;
1579 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1580 gcstopblock[gctopcore]++;
1581 gctopcore = NEXTTOPCORE(gctopblock);
1583 *numblocks = gcstopblock[gctopcore];
1584 *p = gcloads[gctopcore];
1586 *remain=(b<NUMCORES4GC) ?
1587 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1588 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1590 BAMBOO_DEBUGPRINT(0xd106);
1591 BAMBOO_DEBUGPRINT_REG(gctopcore);
1592 BAMBOO_DEBUGPRINT_REG(*p);
1593 BAMBOO_DEBUGPRINT_REG(b);
1594 BAMBOO_DEBUGPRINT_REG(*remain);
1596 } // if(memneed < remain)
1598 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1600 inline void compact2Heaptop() {
1601 // no cores with spare mem and some cores are blocked with pending move
1602 // find the current heap top and make them move to the heap top
1604 int numblocks = gcfilledblocks[gctopcore];
1605 //BASEPTR(gctopcore, numblocks, &p);
1606 p = gcloads[gctopcore];
1609 int remain = (b<NUMCORES4GC) ?
1610 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1611 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1612 // check if the top core finishes
1613 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1614 if(gccorestatus[gctopcore] != 0) {
1616 BAMBOO_DEBUGPRINT(0xd101);
1617 BAMBOO_DEBUGPRINT_REG(gctopcore);
1619 // let the top core finishes its own work first
1620 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1621 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1624 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1627 BAMBOO_DEBUGPRINT(0xd102);
1628 BAMBOO_DEBUGPRINT_REG(gctopcore);
1629 BAMBOO_DEBUGPRINT_REG(p);
1630 BAMBOO_DEBUGPRINT_REG(b);
1631 BAMBOO_DEBUGPRINT_REG(remain);
1633 for(int i = 0; i < NUMCORES4GC; i++) {
1634 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1635 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1637 BAMBOO_DEBUGPRINT(0xd103);
1639 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1640 if(gccorestatus[gctopcore] != 0) {
1642 BAMBOO_DEBUGPRINT(0xd101);
1643 BAMBOO_DEBUGPRINT_REG(gctopcore);
1645 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1646 // the top core is not free now
1649 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1650 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1651 } // for(i = 0; i < NUMCORES4GC; i++)
1653 BAMBOO_DEBUGPRINT(0xd106);
1655 } // void compact2Heaptop()
1657 inline void resolvePendingMoveRequest() {
1659 BAMBOO_DEBUGPRINT(0xeb01);
1662 BAMBOO_DEBUGPRINT(0xeeee);
1663 for(int k = 0; k < NUMCORES4GC; k++) {
1664 BAMBOO_DEBUGPRINT(0xf000+k);
1665 BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1666 BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1667 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1668 BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1670 BAMBOO_DEBUGPRINT(0xffff);
1674 bool nosparemem = true;
1675 bool haspending = false;
1676 bool hasrunning = false;
1677 bool noblock = false;
1678 int dstcore = 0; // the core who need spare mem
1679 int sourcecore = 0; // the core who has spare mem
1680 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1682 // check if there are cores with spare mem
1683 if(gccorestatus[i] == 0) {
1684 // finished working, check if it still have spare mem
1685 if(gcfilledblocks[i] < gcstopblock[i]) {
1686 // still have spare mem
1689 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1694 if(gccorestatus[j] != 0) {
1695 // not finished, check if it has pending move requests
1696 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1701 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1702 } // if(gccorestatus[i] == 0) else ...
1704 } // if(!haspending)
1705 if(!nosparemem && haspending) {
1709 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1710 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1711 gcrequiredmems[dstcore],
1714 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1716 BAMBOO_DEBUGPRINT(0xeb02);
1717 BAMBOO_DEBUGPRINT_REG(sourcecore);
1718 BAMBOO_DEBUGPRINT_REG(dstcore);
1719 BAMBOO_DEBUGPRINT_REG(startaddr);
1720 BAMBOO_DEBUGPRINT_REG(tomove);
1722 if(STARTUPCORE == dstcore) {
1724 BAMBOO_DEBUGPRINT(0xeb03);
1726 gcdstcore = sourcecore;
1728 gcmovestartaddr = startaddr;
1729 gcblock2fill = tomove;
1732 BAMBOO_DEBUGPRINT(0xeb04);
1734 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1735 startaddr, tomove, false);
1742 } // for(i = 0; i < NUMCORES4GC; i++)
1744 BAMBOO_DEBUGPRINT(0xcccc);
1745 BAMBOO_DEBUGPRINT_REG(hasrunning);
1746 BAMBOO_DEBUGPRINT_REG(haspending);
1747 BAMBOO_DEBUGPRINT_REG(noblock);
1750 if(!hasrunning && !noblock) {
1751 gcphase = SUBTLECOMPACTPHASE;
1755 } // void resovePendingMoveRequest()
1758 int numblocks; // block num for heap
1759 INTPTR base; // base virtual address of current heap block
1760 INTPTR ptr; // virtual address of current heap top
1761 int offset; // offset in current heap block
1762 int blockbase; // virtual address of current small block to check
1763 int blockbound; // bound virtual address of current small blcok
1764 int sblockindex; // index of the small blocks
1765 int top; // real size of current heap block to check
1766 int bound; // bound size of current heap block to check
1767 }; // struct moveHelper
1769 // If out of boundary of valid shared memory, return false, else return true
1770 inline bool nextSBlock(struct moveHelper * orig) {
1771 orig->blockbase = orig->blockbound;
1772 bool sbchanged = false;
1774 BAMBOO_DEBUGPRINT(0xecc0);
1775 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1776 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1777 BAMBOO_DEBUGPRINT_REG(orig->bound);
1778 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1781 // check if across a big block
1782 // TODO now do not zero out the whole memory, maybe the last two conditions
1784 if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
1785 || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
1786 || ((*((int*)orig->blockbase))==0)) {
1788 // end of current heap block, jump to next one
1791 BAMBOO_DEBUGPRINT(0xecc1);
1792 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1794 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1796 BAMBOO_DEBUGPRINT(orig->base);
1798 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1800 orig->ptr = orig->base; // set current ptr to out of boundary too
1803 //orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1804 orig->blockbase = orig->base;
1805 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1808 BLOCKINDEX(orig->base, &blocknum);
1809 if(bamboo_smemtbl[blocknum] == 0) {
1811 goto innernextSBlock;
1813 // check the bamboo_smemtbl to decide the real bound
1814 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1815 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1816 orig->sblockindex += 1;
1818 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1820 // check if this sblock should be skipped or have special start point
1821 if(gcsbstarttbl[orig->sblockindex] == -1) {
1824 BAMBOO_DEBUGPRINT(0xecc2);
1826 orig->sblockindex += 1;
1827 orig->blockbase += BAMBOO_SMEM_SIZE;
1828 goto outernextSBlock;
1829 } else if((gcsbstarttbl[orig->sblockindex] != 0)
1831 // the first time to access this SBlock
1833 BAMBOO_DEBUGPRINT(0xecc3);
1835 // not start from the very beginning
1836 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1837 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1839 // setup information for this sblock
1840 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1841 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1842 orig->ptr = orig->blockbase + orig->offset;
1844 BAMBOO_DEBUGPRINT(0xecc4);
1845 BAMBOO_DEBUGPRINT_REG(orig->base);
1846 BAMBOO_DEBUGPRINT_REG(orig->bound);
1847 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1848 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1849 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1850 BAMBOO_DEBUGPRINT_REG(orig->offset);
1852 if(orig->ptr >= orig->bound) {
1853 // met a lobj, move to next block
1854 goto innernextSBlock;
1858 } // bool nextSBlock(struct moveHelper * orig)
1860 // return false if there are no available data to compact
1861 inline bool initOrig_Dst(struct moveHelper * orig,
1862 struct moveHelper * to) {
1865 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1866 to->bound = BAMBOO_SMEM_SIZE_L;
1867 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1870 BAMBOO_DEBUGPRINT(0xef01);
1871 BAMBOO_DEBUGPRINT_REG(to->base);
1873 to->ptr = to->base + to->offset;
1874 #ifdef GC_CACHE_ADAPT
1875 // initialize the gc_cache_revise_information
1876 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1877 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1878 ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1879 gc_cache_revise_infomation.to_page_index =
1880 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
1881 gc_cache_revise_infomation.orig_page_start_va = -1;
1882 #endif // GC_CACHE_ADAPT
1884 // init the orig ptr
1885 orig->numblocks = 0;
1886 orig->base = to->base;
1888 BLOCKINDEX(orig->base, &blocknum);
1889 // check the bamboo_smemtbl to decide the real bound
1890 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1891 orig->blockbase = orig->base;
1892 orig->sblockindex = (orig->base - gcbaseva) / BAMBOO_SMEM_SIZE;
1894 BAMBOO_DEBUGPRINT(0xef02);
1895 BAMBOO_DEBUGPRINT_REG(orig->base);
1896 BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1897 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1898 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1901 if(gcsbstarttbl[orig->sblockindex] == -1) {
1903 BAMBOO_DEBUGPRINT(0xef03);
1907 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1908 return nextSBlock(orig);
1909 } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1911 BAMBOO_DEBUGPRINT(0xef04);
1913 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1916 BAMBOO_DEBUGPRINT(0xef05);
1918 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1919 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1920 orig->ptr = orig->blockbase + orig->offset;
1922 BAMBOO_DEBUGPRINT(0xef06);
1923 BAMBOO_DEBUGPRINT_REG(orig->base);
1927 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1929 inline void nextBlock(struct moveHelper * to) {
1930 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1931 to->bound += BAMBOO_SMEM_SIZE;
1933 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1934 to->offset = BAMBOO_CACHE_LINE_SIZE;
1935 to->ptr = to->base + to->offset;
1936 } // void nextBlock(struct moveHelper * to)
1938 // endaddr does not contain spaces for headers
1939 inline bool moveobj(struct moveHelper * orig,
1940 struct moveHelper * to,
1942 if(stopblock == 0) {
1947 BAMBOO_DEBUGPRINT(0xe201);
1948 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949 BAMBOO_DEBUGPRINT_REG(to->ptr);
1957 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1958 orig->ptr = (int*)(orig->ptr) + 1;
1960 #ifdef GC_CACHE_ADAPT
1961 if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
1962 // end of an orig page
1963 // compute the impact of this page for the new page
1964 int tmp_factor = to->ptr-gc_cache_revise_infomation.to_page_start_va;
1965 int topage=gc_cache_revise_infomation.to_page_index;
1966 int oldpage = gc_cache_revise_infomation.orig_page_index;
1967 int * newtable=&gccachesamplingtbl_r[topage];
1968 int * oldtable=&gccachesamplingtbl[oldpage];
1970 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1971 (*newtable) += (*oldtable)*tmp_factor;
1972 newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1973 oldtable=(int*)(((char *)oldtable)+size_cachesamplingtbl_local);
1975 // prepare for an new orig page
1976 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1977 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1978 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1979 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1980 gc_cache_revise_infomation.orig_page_index = tmp_index;
1981 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1984 if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
1985 if(!nextSBlock(orig)) {
1986 // finished, no more data
1992 BAMBOO_DEBUGPRINT(0xe202);
1993 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1994 BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1996 // check the obj's type, size and mark flag
1997 type = ((int *)(orig->ptr))[0];
2000 // end of this block, go to next one
2001 if(!nextSBlock(orig)) {
2002 // finished, no more data
2006 } else if(type < NUMCLASSES) {
2008 size = classsize[type];
2011 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
2012 int elementsize=classsize[type];
2013 int length=ao->___length___;
2014 size=sizeof(struct ArrayObject)+length*elementsize;
2016 mark = ((int *)(orig->ptr))[6];
2017 bool isremote = ((((int *)(orig->ptr))[6] & REMOTEM) != 0);
2019 BAMBOO_DEBUGPRINT(0xe203);
2020 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2021 BAMBOO_DEBUGPRINT_REG(size);
2023 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
2024 // should be able to across it
2025 if((mark & MARKED) != 0) {
2027 BAMBOO_DEBUGPRINT(0xe204);
2032 // marked obj, copy it to current heap top
2033 // check to see if remaining space is enough
2034 if(to->top + isize > to->bound) {
2035 // fill 0 indicating the end of this block
2036 BAMBOO_MEMSET_WH(to->ptr, '\0', to->bound - to->top);
2037 // fill the header of this block and then go to next block
2038 to->offset += to->bound - to->top;
2039 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2040 (*((int*)(to->base))) = to->offset;
2041 #ifdef GC_CACHE_ADAPT
2042 int tmp_ptr = to->ptr;
2043 #endif // GC_CACHE_ADAPT
2045 #ifdef GC_CACHE_ADAPT
2046 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2047 // end of an to page, wrap up its information
2048 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2049 int topage=gc_cache_revise_infomation.to_page_index;
2050 int oldpage = gc_cache_revise_infomation.orig_page_index;
2051 int * newtable=&gccachesamplingtbl_r[topage];
2052 int * oldtable=&gccachesamplingtbl[oldpage];
2054 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2055 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2056 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2057 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2059 // prepare for an new to page
2060 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2061 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2062 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2063 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
2064 gc_cache_revise_infomation.orig_page_index = tmp_index;
2065 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2066 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2067 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2068 gc_cache_revise_infomation.to_page_index =
2069 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2071 #endif // GC_CACHE_ADAPT
2072 if(stopblock == to->numblocks) {
2073 // already fulfilled the block
2075 } // if(stopblock == to->numblocks)
2076 } // if(to->top + isize > to->bound)
2077 // set the mark field to 2, indicating that this obj has been moved
2078 // and need to be flushed
2079 ((int *)(orig->ptr))[6] = COMPACTED;
2080 if(to->ptr != orig->ptr) {
2081 if((int)(orig->ptr) < (int)(to->ptr)+size) {
2082 memmove(to->ptr, orig->ptr, size);
2084 //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
2085 memcpy(to->ptr, orig->ptr, size);
2087 // fill the remaining space with -2
2088 BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
2090 // store mapping info
2091 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2092 #ifdef LOCALHASHTBL_TEST
2093 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2095 mgchashInsert_I(gcpointertbl, orig->ptr, to->ptr);
2097 //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2099 // add to the sharedptbl
2100 if(gcsharedptbl != NULL) {
2101 //GCSharedHashadd_I(gcsharedptbl, orig->ptr, to->ptr);
2102 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
2105 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2108 BAMBOO_DEBUGPRINT(0xcdce);
2109 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2110 BAMBOO_DEBUGPRINT_REG(to->ptr);
2111 BAMBOO_DEBUGPRINT_REG(isize);
2113 gccurr_heaptop -= isize;
2115 to->offset += isize;
2118 #ifdef GC_CACHE_ADAPT
2119 int tmp_ptr = to->ptr;
2120 #endif // GC_CACHE_ADAPT
2121 if(to->top == to->bound) {
2122 // fill the header of this block and then go to next block
2123 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2124 (*((int*)(to->base))) = to->offset;
2126 #ifdef GC_CACHE_ADAPT
2127 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2128 // end of an to page, wrap up its information
2129 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2130 int topage=gc_cache_revise_infomation.to_page_index;
2131 int oldpage = gc_cache_revise_infomation.orig_page_index;
2132 int * newtable=&gccachesamplingtbl_r[topage];
2133 int * oldtable=&gccachesamplingtbl[oldpage];
2135 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2136 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2137 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2138 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2140 // prepare for an new to page
2141 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2142 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2143 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2144 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2145 gc_cache_revise_infomation.orig_page_index =
2146 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2147 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2148 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2149 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2150 gc_cache_revise_infomation.to_page_index =
2151 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2153 #endif // GC_CACHE_ADAPT
2158 BAMBOO_DEBUGPRINT(0xe205);
2164 BAMBOO_DEBUGPRINT_REG(isize);
2165 BAMBOO_DEBUGPRINT_REG(size);
2166 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2167 BAMBOO_DEBUGPRINT_REG(orig->bound);
2169 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2171 BAMBOO_DEBUGPRINT(0xe206);
2173 if(!nextSBlock(orig)) {
2174 // finished, no more data
2179 BAMBOO_DEBUGPRINT(0xe207);
2180 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2183 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2185 // should be invoked with interrupt closed
2186 inline int assignSpareMem_I(int sourcecore,
2191 BLOCKINDEX(gcloads[sourcecore], &b);
2192 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2193 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2194 int remain = boundptr - gcloads[sourcecore];
2195 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2196 *startaddr = gcloads[sourcecore];
2197 *tomove = gcfilledblocks[sourcecore] + 1;
2198 if(memneed < remain) {
2199 gcloads[sourcecore] += memneed;
2202 // next available block
2203 gcfilledblocks[sourcecore] += 1;
2205 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2206 gcloads[sourcecore] = newbase;
2207 return requiredmem-remain;
2209 } // int assignSpareMem_I(int ,int * , int * , int * )
2211 // should be invoked with interrupt closed
2212 inline bool gcfindSpareMem_I(int * startaddr,
2217 for(int k = 0; k < NUMCORES4GC; k++) {
2218 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2219 // check if this stopped core has enough mem
2220 assignSpareMem_I(k, requiredmem, tomove, startaddr);
2225 // if can not find spare mem right now, hold the request
2226 gcrequiredmems[requiredcore] = requiredmem;
2229 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2231 inline bool compacthelper(struct moveHelper * orig,
2232 struct moveHelper * to,
2235 bool * localcompact) {
2236 // scan over all objs in this block, compact the marked objs
2237 // loop stop when finishing either scanning all active objs or
2238 // fulfilled the gcstopblock
2240 BAMBOO_DEBUGPRINT(0xe101);
2241 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2242 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2245 while(orig->ptr < gcmarkedptrbound) {
2246 bool stop = moveobj(orig, to, gcblock2fill);
2251 #ifdef GC_CACHE_ADAPT
2252 // end of an to page, wrap up its information
2253 int tmp_factor = to->ptr-gc_cache_revise_infomation.to_page_start_va;
2254 int topage=gc_cache_revise_infomation.to_page_index;
2255 int oldpage = gc_cache_revise_infomation.orig_page_index;
2256 int * newtable=&gccachesamplingtbl_r[topage];
2257 int * oldtable=&gccachesamplingtbl[oldpage];
2259 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2260 (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
2261 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2262 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2264 #endif // GC_CACHE_ADAPT
2265 // if no objs have been compact, do nothing,
2266 // otherwise, fill the header of this block
2267 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2268 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2269 (*((int*)(to->base))) = to->offset;
2273 to->top -= BAMBOO_CACHE_LINE_SIZE;
2274 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2276 *heaptopptr = to->ptr;
2277 *filledblocks = to->numblocks;
2280 BAMBOO_DEBUGPRINT(0xe102);
2281 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2282 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2283 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2284 BAMBOO_DEBUGPRINT_REG(*filledblocks);
2285 BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2288 // send msgs to core coordinator indicating that the compact is finishing
2289 // send compact finish message to core coordinator
2290 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2291 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2292 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2293 if(orig->ptr < gcmarkedptrbound) {
2295 BAMBOO_DEBUGPRINT(0xe103);
2299 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2300 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2301 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2303 BAMBOO_DEBUGPRINT(0xe104);
2307 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2309 BAMBOO_DEBUGPRINT(0xe105);
2313 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2316 BAMBOO_DEBUGPRINT(0xe106);
2318 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2323 if(orig->ptr < gcmarkedptrbound) {
2325 BAMBOO_DEBUGPRINT(0xe107);
2329 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2330 *filledblocks, *heaptopptr, gccurr_heaptop, false);
2333 BAMBOO_DEBUGPRINT(0xe108);
2334 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2336 // finish compacting
2337 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2338 *filledblocks, *heaptopptr, 0, false);
2340 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2342 if(orig->ptr < gcmarkedptrbound) {
2344 BAMBOO_DEBUGPRINT(0xe109);
2346 // still have unpacked obj
2355 BAMBOO_DEBUGPRINT(0xe10a);
2358 to->ptr = gcmovestartaddr;
2359 to->numblocks = gcblock2fill - 1;
2360 to->bound = (to->numblocks==0) ?
2361 BAMBOO_SMEM_SIZE_L :
2362 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2363 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2364 to->offset = to->ptr - to->base;
2365 to->top = (to->numblocks==0) ?
2366 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2368 to->offset = BAMBOO_CACHE_LINE_SIZE;
2369 to->ptr += to->offset; // for header
2370 to->top += to->offset;
2371 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2372 *localcompact = true;
2374 *localcompact = false;
2376 #ifdef GC_CACHE_ADAPT
2377 // initialize the gc_cache_revise_information
2378 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2379 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2380 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2381 gc_cache_revise_infomation.to_page_index =
2382 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2383 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2384 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2385 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2386 gc_cache_revise_infomation.orig_page_index =
2387 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2388 #endif // GC_CACHE_ADAPT
2392 BAMBOO_DEBUGPRINT(0xe10b);
2395 } // void compacthelper()
2397 inline void compact() {
2398 if(COMPACTPHASE != gcphase) {
2399 BAMBOO_EXIT(0xb102);
2402 // initialize pointers for comapcting
2403 struct moveHelper * orig =
2404 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2405 struct moveHelper * to =
2406 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2408 if(!initOrig_Dst(orig, to)) {
2409 // no available data to compact
2410 // send compact finish msg to STARTUP core
2412 BAMBOO_DEBUGPRINT(0xe001);
2413 BAMBOO_DEBUGPRINT_REG(to->base);
2415 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2416 0, to->base, 0, false);
2421 #ifdef GC_CACHE_ADAPT
2422 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2423 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2424 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2425 gc_cache_revise_infomation.orig_page_index =
2426 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2427 #endif // GC_CACHE_ADAPT
2429 int filledblocks = 0;
2430 INTPTR heaptopptr = 0;
2431 bool localcompact = true;
2432 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2438 // if return NULL, means
2439 // 1. objptr is NULL
2440 // 2. objptr is not a shared obj
2441 // in these cases, remain the original value is OK
2442 inline void * flushObj(void * objptr) {
2444 BAMBOO_DEBUGPRINT(0xe401);
2446 if(objptr == NULL) {
2449 void * dstptr = NULL;
2450 if(ISSHAREDOBJ(objptr)) {
2452 BAMBOO_DEBUGPRINT(0xe402);
2453 BAMBOO_DEBUGPRINT_REG(objptr);
2455 // a shared obj ptr, change to new address
2456 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2458 //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
2460 #ifdef LOCALHASHTBL_TEST
2461 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2463 dstptr = mgchashSearch(gcpointertbl, objptr);
2465 //MGCHashget(gcpointertbl, objptr, &dstptr);
2467 //flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
2469 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2471 BAMBOO_DEBUGPRINT_REG(dstptr);
2474 if(NULL == dstptr) {
2477 BAMBOO_DEBUGPRINT(0xe403);
2478 BAMBOO_DEBUGPRINT_REG(objptr);
2479 BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2481 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2482 // error! the obj is right on this core, but cannot find it
2483 //BAMBOO_DEBUGPRINT(0xecec);
2484 BAMBOO_DEBUGPRINT_REG(objptr);
2485 BAMBOO_EXIT(0xb103);
2486 // assume that the obj has not been moved, use the original address
2489 int hostc = hostcore(objptr);
2491 //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
2493 // check the corresponsing sharedptbl
2494 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2495 //struct GCSharedHash * sptbl = gcrpointertbls[hostcore(objptr)];
2496 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2498 //GCSharedHashget(sptbl, (int)objptr, &dstptr);
2499 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2500 if(dstptr != NULL) {
2501 #ifdef LOCALHASHTBL_TEST
2502 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2504 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2508 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2510 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2513 if(dstptr == NULL) {
2514 // still can not get the mapping info,
2515 // send msg to host core for the mapping info
2516 gcobj2map = (int)objptr;
2519 // the first time require the mapping, send msg to the hostcore
2520 // for the mapping info
2521 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2522 BAMBOO_NUM_OF_CORE, false);
2529 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2531 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2532 #ifdef LOCALHASHTBL_TEST
2533 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2535 dstptr = mgchashSearch(gcpointertbl, objptr);
2537 //MGCHashget(gcpointertbl, objptr, &dstptr);
2538 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2539 } // if(dstptr == NULL)
2540 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2542 BAMBOO_DEBUGPRINT_REG(dstptr);
2544 } // if(NULL == dstptr)
2545 } // if(ISSHAREDOBJ(objptr))
2546 // if not a shared obj, return NULL to indicate no need to flush
2548 BAMBOO_DEBUGPRINT(0xe404);
2551 } // void flushObj(void * objptr)
2553 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2555 // flush current stack
2556 while(stackptr!=NULL) {
2557 for(i=0; i<stackptr->size; i++) {
2558 if(stackptr->array[i] != NULL) {
2559 void * dst = flushObj(stackptr->array[i]);
2561 stackptr->array[i] = dst;
2565 stackptr=stackptr->next;
2569 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2570 for(i=0; i<NUMCLASSES; i++) {
2571 struct parameterwrapper ** queues =
2572 objectqueues[BAMBOO_NUM_OF_CORE][i];
2573 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2574 for(j = 0; j < length; ++j) {
2575 struct parameterwrapper * parameter = queues[j];
2576 struct ObjectHash * set=parameter->objectset;
2577 struct ObjectNode * ptr=set->listhead;
2579 void * dst = flushObj((void *)ptr->key);
2585 ObjectHashrehash(set);
2590 // flush current task descriptor
2591 if(currtpd != NULL) {
2592 for(i=0; i<currtpd->numParameters; i++) {
2593 void * dst = flushObj(currtpd->parameterArray[i]);
2595 currtpd->parameterArray[i] = dst;
2600 // flush active tasks
2601 if(activetasks != NULL) {
2602 struct genpointerlist * ptr=activetasks->list;
2604 struct taskparamdescriptor *tpd=ptr->src;
2606 for(i=0; i<tpd->numParameters; i++) {
2607 void * dst = flushObj(tpd->parameterArray[i]);
2609 tpd->parameterArray[i] = dst;
2614 genrehash(activetasks);
2617 // flush cached transferred obj
2618 struct QueueItem * tmpobjptr = getHead(&objqueue);
2619 while(tmpobjptr != NULL) {
2620 struct transObjInfo * objInfo =
2621 (struct transObjInfo *)(tmpobjptr->objectptr);
2622 void * dst = flushObj(objInfo->objptr);
2624 objInfo->objptr = dst;
2626 tmpobjptr = getNextQueueItem(tmpobjptr);
2629 // flush cached objs to be transferred
2630 struct QueueItem * item = getHead(totransobjqueue);
2631 while(item != NULL) {
2632 struct transObjInfo * totransobj =
2633 (struct transObjInfo *)(item->objectptr);
2634 void * dst = flushObj(totransobj->objptr);
2636 totransobj->objptr = dst;
2638 item = getNextQueueItem(item);
2639 } // while(item != NULL)
2641 // enqueue lock related info
2642 for(i = 0; i < runtime_locklen; ++i) {
2643 void * dst = flushObj(runtime_locks[i].redirectlock);
2645 runtime_locks[i].redirectlock = (int)dst;
2647 if(runtime_locks[i].value != NULL) {
2648 void * dst=flushObj(runtime_locks[i].value);
2650 runtime_locks[i].value = (int)dst;
2655 } // void flushRuntimeObj(struct garbagelist * stackptr)
2657 inline void transmappinginfo() {
2658 // broadcast the sharedptbl pointer
2659 for(int i = 0; i < NUMCORESACTIVE; i++) {
2660 if(i != BAMBOO_NUM_OF_CORE) {
2661 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2665 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2666 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2670 inline void flush(struct garbagelist * stackptr) {
2672 flushRuntimeObj(stackptr);
2675 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2676 bool hasItems = gc_moreItems_I();
2677 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2683 BAMBOO_DEBUGPRINT(0xe301);
2685 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2686 void * ptr = gc_dequeue_I();
2687 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2688 if(ISSHAREDOBJ(ptr)) {
2689 // should be a local shared obj and should have mapping info
2690 ptr = flushObj(ptr);
2692 BAMBOO_DEBUGPRINT(0xe302);
2693 BAMBOO_DEBUGPRINT_REG(ptr);
2694 BAMBOO_DEBUGPRINT_REG(tptr);
2695 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2698 BAMBOO_EXIT(0xb105);
2700 } // if(ISSHAREDOBJ(ptr))
2701 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2702 int type = ((int *)(ptr))[0];
2703 // scan all pointers in ptr
2704 unsigned INTPTR * pointer;
2705 pointer=pointerarray[type];
2707 BAMBOO_DEBUGPRINT(0xe303);
2708 BAMBOO_DEBUGPRINT_REG(pointer);
2711 /* Array of primitives */
2713 } else if (((INTPTR)pointer)==1) {
2715 BAMBOO_DEBUGPRINT(0xe304);
2717 /* Array of pointers */
2718 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2719 int length=ao->___length___;
2721 for(j=0; j<length; j++) {
2723 BAMBOO_DEBUGPRINT(0xe305);
2726 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2728 BAMBOO_DEBUGPRINT_REG(objptr);
2730 if(objptr != NULL) {
2731 void * dst = flushObj(objptr);
2733 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2739 BAMBOO_DEBUGPRINT(0xe306);
2741 INTPTR size=pointer[0];
2743 for(i=1; i<=size; i++) {
2745 BAMBOO_DEBUGPRINT(0xe307);
2747 unsigned int offset=pointer[i];
2748 void * objptr=*((void **)(((char *)ptr)+offset));
2750 BAMBOO_DEBUGPRINT_REG(objptr);
2752 if(objptr != NULL) {
2753 void * dst = flushObj(objptr);
2755 *((void **)(((char *)ptr)+offset)) = dst;
2758 } // for(i=1; i<=size; i++)
2759 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2760 // restore the mark field, indicating that this obj has been flushed
2761 if(ISSHAREDOBJ(ptr)) {
2762 ((int *)(ptr))[6] = INIT;
2764 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2765 } // while(gc_moreItems())
2767 BAMBOO_DEBUGPRINT(0xe308);
2770 // TODO bug here: the startup core contains all lobjs' info, thus all the
2771 // lobjs are flushed in sequence.
2773 while(gc_lobjmoreItems_I()) {
2775 BAMBOO_DEBUGPRINT(0xe309);
2777 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2778 ptr = flushObj(ptr);
2780 BAMBOO_DEBUGPRINT(0xe30a);
2781 BAMBOO_DEBUGPRINT_REG(ptr);
2782 BAMBOO_DEBUGPRINT_REG(tptr);
2783 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2786 BAMBOO_EXIT(0xb106);
2788 if(((int *)(ptr))[6] == COMPACTED) {
2789 int type = ((int *)(ptr))[0];
2790 // scan all pointers in ptr
2791 unsigned INTPTR * pointer;
2792 pointer=pointerarray[type];
2794 BAMBOO_DEBUGPRINT(0xe30b);
2795 BAMBOO_DEBUGPRINT_REG(pointer);
2798 /* Array of primitives */
2800 } else if (((INTPTR)pointer)==1) {
2802 BAMBOO_DEBUGPRINT(0xe30c);
2804 /* Array of pointers */
2805 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2806 int length=ao->___length___;
2808 for(j=0; j<length; j++) {
2810 BAMBOO_DEBUGPRINT(0xe30d);
2813 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2815 BAMBOO_DEBUGPRINT_REG(objptr);
2817 if(objptr != NULL) {
2818 void * dst = flushObj(objptr);
2820 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2826 BAMBOO_DEBUGPRINT(0xe30e);
2828 INTPTR size=pointer[0];
2830 for(i=1; i<=size; i++) {
2832 BAMBOO_DEBUGPRINT(0xe30f);
2834 unsigned int offset=pointer[i];
2835 void * objptr=*((void **)(((char *)ptr)+offset));
2838 BAMBOO_DEBUGPRINT_REG(objptr);
2840 if(objptr != NULL) {
2841 void * dst = flushObj(objptr);
2843 *((void **)(((char *)ptr)+offset)) = dst;
2846 } // for(i=1; i<=size; i++)
2847 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2848 // restore the mark field, indicating that this obj has been flushed
2849 ((int *)(ptr))[6] = INIT;
2850 } // if(((int *)(ptr))[6] == COMPACTED)
2851 } // while(gc_lobjmoreItems())
2853 BAMBOO_DEBUGPRINT(0xe310);
2856 // send flush finish message to core coordinator
2857 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2858 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2860 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2863 BAMBOO_DEBUGPRINT(0xe311);
2867 #ifdef GC_CACHE_ADAPT
2868 // prepare for cache adaption:
2869 // -- flush the shared heap
2870 // -- clean dtlb entries
2871 // -- change cache strategy
2872 void cacheAdapt_gc(bool isgccachestage) {
2873 // flush the shared heap
2874 BAMBOO_CACHE_FLUSH_L2();
2876 // clean the dtlb entries
2877 BAMBOO_CLEAN_DTLB();
2879 // change the cache strategy
2880 gccachestage = isgccachestage;
2881 } // cacheAdapt_gc(bool isgccachestage)
2883 // the master core decides how to adapt cache strategy for the mutator
2884 // according to collected statistic data
2886 // make all pages hfh
2887 int cacheAdapt_policy_h4h(){
2888 unsigned int page_index = 0;
2890 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2892 int * tmp_p = gccachepolicytbl+1;
2893 for(page_index = 0; page_index < page_num; page_index++) {
2894 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2895 bamboo_cache_policy_t policy = {0};
2896 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2897 *tmp_p = page_index;
2899 *tmp_p = policy.word;
2905 } // int cacheAdapt_policy_hfh()
2907 // make all pages local as non-cache-adaptable gc local mode
2908 int cacheAdapt_policy_local(){
2909 unsigned int page_index = 0;
2911 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2913 int * tmp_p = gccachepolicytbl+1;
2914 for(page_index = 0; page_index < page_num; page_index++) {
2915 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2916 bamboo_cache_policy_t policy = {0};
2918 BLOCKINDEX(page_sva, &block);
2919 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2920 // locally cache the page in the hotest core
2921 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2922 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2923 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2924 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2925 *tmp_p = page_index;
2927 *tmp_p = policy.word;
2933 } // int cacheAdapt_policy_local()
2935 int cacheAdapt_policy_hotest(){
2936 unsigned int page_index = 0;
2938 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2940 int * tmp_p = gccachepolicytbl+1;
2941 for(page_index = 0; page_index < page_num; page_index++) {
2942 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2943 bamboo_cache_policy_t policy = {0};
2947 for(int i = 0; i < NUMCORESACTIVE; i++) {
2948 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
2949 +size_cachesamplingtbl_local_r*i);
2950 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
2952 // check the freqency, decide if this page is hot for the core
2953 if(hotfreq < freq) {
2959 // Decide the cache strategy for this page
2960 // If decide to adapt a new cache strategy, write into the shared block of
2961 // the gcsharedsamplingtbl. The mem recording information that has been
2962 // written is enough to hold the information.
2963 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2965 // this page has not been accessed, do not change its cache policy
2968 // locally cache the page in the hotest core
2969 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2970 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2971 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2972 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2973 *tmp_p = page_index;
2975 *tmp_p = policy.word;
2982 } // int cacheAdapt_policy_hotest()
2984 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
2985 // cache the page on the core that accesses it the most if that core accesses
2986 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
2988 int cacheAdapt_policy_dominate(){
2989 unsigned int page_index = 0;
2991 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2993 int * tmp_p = gccachepolicytbl+1;
2994 for(page_index = 0; page_index < page_num; page_index++) {
2995 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2996 bamboo_cache_policy_t policy = {0};
3001 for(int i = 0; i < NUMCORESACTIVE; i++) {
3002 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3003 +size_cachesamplingtbl_local_r*i);
3004 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3007 // check the freqency, decide if this page is hot for the core
3008 if(hotfreq < freq) {
3013 // Decide the cache strategy for this page
3014 // If decide to adapt a new cache strategy, write into the shared block of
3016 // Format: page start va + cache policy
3018 // this page has not been accessed, do not change its cache policy
3021 totalfreq = (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100;
3022 if(hotfreq < totalfreq) {
3024 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3026 // locally cache the page in the hotest core
3027 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3028 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3029 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3030 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3032 *tmp_p = page_index;
3034 *tmp_p = policy.word;
3040 } // int cacheAdapt_policy_dominate()
3042 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 20000
3044 void gc_quicksort(int *array,
3050 int rightIdx = right;
3051 if((right-left+1) >= 1) {
3052 pivot = (left+right)/2;
3053 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
3054 int pivotValue = array[pivot*3-offset];
3055 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
3058 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
3061 // swap [leftIdx] & [rightIdx]
3062 for(int k = 0; k < 3; k++) {
3063 int tmp = array[3*rightIdx-k];
3064 array[3*rightIdx-k] = array[3*leftIdx-k];
3065 array[3*leftIdx-k] = tmp;
3069 if((leftIdx-1) == pivot) {
3070 pivot = rightIdx = rightIdx + 1;
3071 } else if((leftIdx+1) == pivot) {
3072 pivot = leftIdx = leftIdx-1;
3075 gc_quicksort(array, left, pivot-1, offset);
3076 gc_quicksort(array, pivot+1, right, offset);
3079 } // void gc_quicksort(...)
3081 // Every page cached on the core that accesses it the most.
3082 // Check to see if any core's pages total more accesses than threshold
3083 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3084 // most remote accesses and hash for home them until we get below
3085 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
3086 int cacheAdapt_policy_overload(){
3087 unsigned int page_index = 0;
3089 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3091 int * tmp_p = gccachepolicytbl+1;
3092 unsigned long long workload[NUMCORESACTIVE];
3093 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3094 unsigned long long total_workload = 0;
3095 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3096 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3097 for(page_index = 0; page_index < page_num; page_index++) {
3098 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3099 bamboo_cache_policy_t policy = {0};
3104 for(int i = 0; i < NUMCORESACTIVE; i++) {
3105 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3106 +size_cachesamplingtbl_local_r*i);
3107 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3110 // check the freqency, decide if this page is hot for the core
3111 if(hotfreq < freq) {
3116 /*if(page_sva == 0x10e90000) {
3117 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3121 // Decide the cache strategy for this page
3122 // If decide to adapt a new cache strategy, write into the shared block of
3123 // the gcsharedsamplingtbl. The mem recording information that has been
3124 // written is enough to hold the information.
3125 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3127 // this page has not been accessed, do not change its cache policy
3130 // locally cache the page in the hotest core
3131 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3132 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3133 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3134 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3135 *tmp_p = page_index;
3137 *tmp_p = policy.word;
3140 workload[hotestcore] += totalfreq;
3141 total_workload += totalfreq;
3142 // insert into core2heavypages using quicksort
3143 int remoteaccess = totalfreq - hotfreq;
3144 int index = core2heavypages[hotestcore][0];
3145 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3146 core2heavypages[hotestcore][3*index+2] = totalfreq;
3147 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3148 core2heavypages[hotestcore][0]++;
3150 /*if(page_sva == 0x10f10000) {
3152 BLOCKINDEX(page_sva, &block);
3153 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3154 int coord_x = bamboo_cpu2coords[2*coren]+1;
3155 int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3156 tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3160 int workload_threshold = total_workload / 10;
3161 // Check the workload of each core
3162 for(int i = 0; i < NUMCORESACTIVE; i++) {
3164 int index = core2heavypages[i][0];
3165 if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3166 // sort according to the remoteaccess
3167 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3168 while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3169 // hfh those pages with more remote accesses
3170 bamboo_cache_policy_t policy = {0};
3171 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3172 *((int*)core2heavypages[i][j]) = policy.word;
3173 workload[i] -= core2heavypages[i][j+1];
3180 } // int cacheAdapt_policy_overload()
3182 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3183 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
3184 // Every page cached on the core that accesses it the most.
3185 // Check to see if any core's pages total more accesses than threshold
3186 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3187 // most remote accesses and hash for home them until we get below
3188 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3189 // Sort pages based on activity....
3190 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3191 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3192 // then start hfh these pages(selecting the ones with the most remote
3193 // accesses first or fewest local accesses) until we get below
3194 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3195 int cacheAdapt_policy_crowd(){
3196 unsigned int page_index = 0;
3198 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3200 int * tmp_p = gccachepolicytbl+1;
3201 unsigned long long workload[NUMCORESACTIVE];
3202 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3203 unsigned long long total_workload = 0;
3204 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3205 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3206 for(page_index = 0; page_index < page_num; page_index++) {
3207 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3208 bamboo_cache_policy_t policy = {0};
3213 for(int i = 0; i < NUMCORESACTIVE; i++) {
3214 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3215 +size_cachesamplingtbl_local_r*i);
3216 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3219 // check the freqency, decide if this page is hot for the core
3220 if(hotfreq < freq) {
3225 /*if(page_sva == 0x10e90000) {
3226 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3230 // Decide the cache strategy for this page
3231 // If decide to adapt a new cache strategy, write into the shared block of
3232 // the gcsharedsamplingtbl. The mem recording information that has been
3233 // written is enough to hold the information.
3234 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3236 // this page has not been accessed, do not change its cache policy
3239 // locally cache the page in the hotest core
3240 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3241 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3242 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3243 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3244 *tmp_p = page_index;
3246 *tmp_p = policy.word;
3249 workload[hotestcore] += totalfreq;
3250 total_workload += totalfreq;
3251 // insert into core2heavypages using quicksort
3252 int remoteaccess = totalfreq - hotfreq;
3253 int index = core2heavypages[hotestcore][0];
3254 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3255 core2heavypages[hotestcore][3*index+2] = totalfreq;
3256 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3257 core2heavypages[hotestcore][0]++;
3259 /*if(page_sva == 0x10f10000) {
3261 BLOCKINDEX(page_sva, &block);
3262 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3263 int coord_x = bamboo_cpu2coords[2*coren]+1;
3264 int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3265 tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3269 int workload_threshold = total_workload / 10;
3270 // Check the workload of each core
3271 for(int i = 0; i < NUMCORESACTIVE; i++) {
3273 int index = core2heavypages[i][0];
3274 if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3275 // sort according to the remoteaccess
3276 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3277 while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3278 // hfh those pages with more remote accesses
3279 bamboo_cache_policy_t policy = {0};
3280 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3281 *((int*)core2heavypages[i][j]) = policy.word;
3282 workload[i] -= core2heavypages[i][j+1];
3287 // Check if the accesses are crowded on few pages
3288 // sort according to the total access
3290 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3291 int threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3292 int num_crowded = 0;
3295 t_workload += core2heavypages[i][j+num_crowded*3+1];
3297 } while(t_workload < threshold);
3298 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3299 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3300 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
3302 // need to hfh these pages
3303 // sort the pages according to remote access
3304 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3305 //while((num_crowded--) && (j < index*3)) {
3306 // h4h those pages with more remote accesses
3307 bamboo_cache_policy_t policy = {0};
3308 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3309 *((int*)core2heavypages[i][j]) = policy.word;
3310 workload[i] -= core2heavypages[i][j+1];
3311 t_workload -= core2heavypages[i][j+1];
3312 /*if((j/3+GC_CACHE_ADAPT_CROWD_THRESHOLD) < index) {
3314 core2heavypages[i][j+GC_CACHE_ADAPT_CROWD_THRESHOLD*3+1];
3317 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3318 /*if(t_workload <= threshold) {
3322 if((j < index*3) && (t_workload > threshold)) {
3323 num_crowded = ((index-j/3) > GC_CACHE_ADAPT_CROWD_THRESHOLD) ?
3324 (GC_CACHE_ADAPT_CROWD_THRESHOLD) : (index-j/3);*/
3331 } // int cacheAdapt_policy_overload()
3333 void cacheAdapt_master() {
3334 #ifdef GC_CACHE_ADAPT
3335 //gc_output_cache_sampling_r();
3336 #endif // GC_CACHE_ADAPT
3338 // check the statistic data
3339 // for each page, decide the new cache strategy
3340 numchanged = cacheAdapt_policy_h4h();
3341 //numchanged = cacheAdapt_policy_local();
3342 //numchanged = cacheAdapt_policy_hotest();
3343 //numchanged = cacheAdapt_policy_dominate();
3344 //numchanged = cacheAdapt_policy_overload();
3345 //numchanged = cacheAdapt_policy_crowd();
3346 *gccachepolicytbl = numchanged;
3348 //if(numchanged > 0) tprintf("=================\n");
3351 // adapt the cache strategy for the mutator
3352 void cacheAdapt_mutator() {
3353 int numchanged = *gccachepolicytbl;
3354 // check the changes and adapt them
3355 int * tmp_p = gccachepolicytbl+1;
3356 while(numchanged--) {
3357 // read out the policy
3358 int page_index = *tmp_p;
3359 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3361 /*if(BAMBOO_NUM_OF_CORE == 0) {
3362 tprintf("va: %x, policy: %d (%d,%d) \n",
3363 (int)(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva), policy.cache_mode,
3364 policy.lotar_x, policy.lotar_y);
3367 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3368 policy, BAMBOO_PAGE_SIZE);
3372 //if(BAMBOO_NUM_OF_CORE == 0) tprintf("=================\n"); // TODO
3375 void gc_output_cache_sampling() {
3376 unsigned int page_index = 0;
3378 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3379 for(page_index = 0; page_index < page_num; page_index++) {
3380 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3382 BLOCKINDEX(page_sva, &block);
3383 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3384 tprintf("va: %x page_index: %d host: %d\n",
3385 (int)page_sva, page_index, coren);
3386 for(int i = 0; i < NUMCORESACTIVE; i++) {
3387 int * local_tbl = (int *)((void *)gccachesamplingtbl
3388 +size_cachesamplingtbl_local*i);
3389 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3390 printf("%8d ",freq);
3394 printf("=================\n");
3395 } // gc_output_cache_sampling
3397 void gc_output_cache_sampling_r() {
3398 unsigned int page_index = 0;
3400 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3401 for(page_index = 0; page_index < page_num; page_index++) {
3402 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3404 BLOCKINDEX(page_sva, &block);
3405 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3406 tprintf("va: %x page_index: %d host: %d\n",
3407 (int)page_sva, page_index, coren);
3408 for(int i = 0; i < NUMCORESACTIVE; i++) {
3409 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3410 +size_cachesamplingtbl_local_r*i);
3411 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3412 printf("%8d ",freq);
3416 printf("=================\n");
3417 } // gc_output_cache_sampling
3418 #endif // GC_CACHE_ADAPT
3420 inline void gc_collect(struct garbagelist * stackptr) {
3421 // inform the master that this core is at a gc safe point and is ready to
3423 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3424 self_numreceiveobjs, false);
3426 // core collector routine
3428 if(INITPHASE == gcphase) {
3432 #ifdef RAWPATH // TODO GC_DEBUG
3433 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3436 #ifdef GC_CACHE_ADAPT
3437 // prepare for cache adaption:
3438 cacheAdapt_gc(true);
3439 #endif // GC_CACHE_ADAPT
3440 //send init finish msg to core coordinator
3441 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3444 if(MARKPHASE == gcphase) {
3448 #ifdef RAWPATH // TODO GC_DEBUG
3449 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3450 udn_tile_coord_y());
3452 mark(true, stackptr);
3453 #ifdef RAWPATH // TODO GC_DEBUG
3454 printf("(%x,%x) Finish mark phase, start compact phase\n",
3455 udn_tile_coord_x(), udn_tile_coord_y());
3458 #ifdef RAWPATH // TODO GC_DEBUG
3459 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3460 udn_tile_coord_y());
3464 if(MAPPHASE == gcphase) {
3468 #ifdef RAWPATH // TODO GC_DEBUG
3469 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3470 udn_tile_coord_y());
3473 #ifdef RAWPATH // TODO GC_DEBUG
3474 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3475 udn_tile_coord_y());
3479 if(FLUSHPHASE == gcphase) {
3483 #ifdef RAWPATH // TODO GC_DEBUG
3484 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3485 udn_tile_coord_y());
3488 // send the num of obj/liveobj/forwardobj to the startupcore
3489 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3490 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3491 gc_num_liveobj, gc_num_forwardobj, false);
3494 #endif // GC_PROFLIE
3496 #ifdef RAWPATH // TODO GC_DEBUG
3497 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3498 udn_tile_coord_y());
3501 #ifdef GC_CACHE_ADAPT
3503 if(PREFINISHPHASE == gcphase) {
3507 #ifdef RAWPATH // TODO GC_DEBUG
3508 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3509 udn_tile_coord_y());
3511 // cache adapt phase
3512 cacheAdapt_mutator();
3513 cacheAdapt_gc(false);
3514 //send init finish msg to core coordinator
3515 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3516 #ifdef RAWPATH // TODO GC_DEBUG
3517 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3518 udn_tile_coord_y());
3520 #endif // GC_CACHE_ADAPT
3523 if(FINISHPHASE == gcphase) {
3527 #ifdef RAWPATH // TODO GC_DEBUG
3528 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3530 } // void gc_collect(struct garbagelist * stackptr)
3532 inline void gc_nocollect(struct garbagelist * stackptr) {
3533 // inform the master that this core is at a gc safe point and is ready to
3535 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3536 self_numreceiveobjs, false);
3539 if(INITPHASE == gcphase) {
3543 #ifdef RAWPATH // TODO GC_DEBUG
3544 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3547 #ifdef GC_CACHE_ADAPT
3548 // prepare for cache adaption:
3549 cacheAdapt_gc(true);
3550 #endif // GC_CACHE_ADAPT
3551 //send init finish msg to core coordinator
3552 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3555 if(MARKPHASE == gcphase) {
3559 #ifdef RAWPATH // TODO GC_DEBUG
3560 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3561 udn_tile_coord_y());
3563 mark(true, stackptr);
3564 #ifdef RAWPATH // TODO GC_DEBUG
3565 printf("(%x,%x) Finish mark phase, wait for flush\n",
3566 udn_tile_coord_x(), udn_tile_coord_y());
3569 // non-gc core collector routine
3571 if(FLUSHPHASE == gcphase) {
3575 #ifdef RAWPATH // TODO GC_DEBUG
3576 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3577 udn_tile_coord_y());
3580 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3581 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3582 gc_num_liveobj, gc_num_forwardobj, false);
3585 #endif // GC_PROFLIE
3587 #ifdef RAWPATH // TODO GC_DEBUG
3588 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3589 udn_tile_coord_y());
3592 #ifdef GC_CACHE_ADAPT
3594 if(PREFINISHPHASE == gcphase) {
3598 #ifdef RAWPATH // TODO GC_DEBUG
3599 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3600 udn_tile_coord_y());
3602 // cache adapt phase
3603 cacheAdapt_mutator();
3604 cacheAdapt_gc(false);
3605 //send init finish msg to core coordinator
3606 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3607 #ifdef RAWPATH // TODO GC_DEBUG
3608 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3609 udn_tile_coord_y());
3611 #endif // GC_CACHE_ADAPT
3614 if(FINISHPHASE == gcphase) {
3618 #ifdef RAWPATH // TODO GC_DEBUG
3619 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3621 } // void gc_collect(struct garbagelist * stackptr)
3623 inline void gc_master(struct garbagelist * stackptr) {
3625 gcphase = INITPHASE;
3627 waitconfirm = false;
3631 // Note: all cores need to init gc including non-gc cores
3632 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3633 // send GC init messages to all cores
3634 send_msg_1(i, GCSTARTINIT, false);
3636 bool isfirst = true;
3637 bool allStall = false;
3639 #ifdef GC_CACHE_ADAPT
3640 // prepare for cache adaption:
3641 cacheAdapt_gc(true);
3642 #endif // GC_CACHE_ADAPT
3644 #ifdef RAWPATH // TODO GC_DEBUG
3645 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3646 udn_tile_coord_y());
3649 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3651 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3652 if(gc_checkAllCoreStatus_I()) {
3653 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3656 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3661 #ifdef GC_CACHE_ADAPT
3662 //gc_output_cache_sampling();
3663 #endif // GC_CACHE_ADAPT
3664 #ifdef RAWPATH // TODO GC_DEBUG
3665 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3666 udn_tile_coord_y());
3668 // all cores have finished compacting
3669 // restore the gcstatus of all cores
3670 // Note: all cores have to do mark including non-gc cores
3671 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3672 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3673 gccorestatus[i] = 1;
3674 // send GC start messages to all cores
3675 send_msg_1(i, GCSTART, false);
3678 gcphase = MARKPHASE;
3680 while(MARKPHASE == gcphase) {
3681 mark(isfirst, stackptr);
3688 } // while(MARKPHASE == gcphase)
3689 // send msgs to all cores requiring large objs info
3690 // Note: only need to ask gc cores, non-gc cores do not host any objs
3691 numconfirm = NUMCORES4GC - 1;
3692 for(i = 1; i < NUMCORES4GC; ++i) {
3693 send_msg_1(i, GCLOBJREQUEST, false);
3695 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3700 } // wait for responses
3701 // check the heaptop
3702 if(gcheaptop < gcmarkedptrbound) {
3703 gcheaptop = gcmarkedptrbound;
3708 #ifdef RAWPATH // TODO GC_DEBUG
3709 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3710 udn_tile_coord_y());
3713 // cache all large objs
3715 // no enough space to cache large objs
3716 BAMBOO_EXIT(0xb107);
3718 // predict number of blocks to fill for each core
3720 int numpbc = loadbalance(&tmpheaptop);
3722 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3723 #ifdef RAWPATH // TODO GC_DEBUG
3724 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3725 udn_tile_coord_y());
3728 //int tmptopptr = 0;
3729 //BASEPTR(gctopcore, 0, &tmptopptr);
3731 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3732 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3734 BAMBOO_DEBUGPRINT(0xabab);
3735 BAMBOO_DEBUGPRINT_REG(tmptopptr);
3737 for(i = 0; i < NUMCORES4GC; ++i) {
3739 BASEPTR(i, numpbc, &tmpcoreptr);
3740 //send start compact messages to all cores
3741 //TODO bug here, do not know if the direction is positive or negtive?
3742 if (tmpcoreptr < tmpheaptop /*tmptopptr*/) {
3743 gcstopblock[i] = numpbc + 1;
3744 if(i != STARTUPCORE) {
3745 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3747 gcblock2fill = numpbc+1;
3748 } // if(i != STARTUPCORE)
3750 gcstopblock[i] = numpbc;
3751 if(i != STARTUPCORE) {
3752 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3754 gcblock2fill = numpbc;
3755 } // if(i != STARTUPCORE)
3758 BAMBOO_DEBUGPRINT(0xf000+i);
3759 BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3760 BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3762 // init some data strutures for compact phase
3764 gcfilledblocks[i] = 0;
3765 gcrequiredmems[i] = 0;
3775 bool finalcompact = false;
3776 // initialize pointers for comapcting
3777 struct moveHelper * orig =
3778 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3779 struct moveHelper * to =
3780 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3781 initOrig_Dst(orig, to);
3782 int filledblocks = 0;
3783 INTPTR heaptopptr = 0;
3784 bool finishcompact = false;
3785 bool iscontinue = true;
3786 bool localcompact = true;
3787 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3788 if((!finishcompact) && iscontinue) {
3790 BAMBOO_DEBUGPRINT(0xe001);
3791 BAMBOO_DEBUGPRINT_REG(numpbc);
3792 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3794 finishcompact = compacthelper(orig, to, &filledblocks,
3795 &heaptopptr, &localcompact);
3797 BAMBOO_DEBUGPRINT(0xe002);
3798 BAMBOO_DEBUGPRINT_REG(finishcompact);
3799 BAMBOO_DEBUGPRINT_REG(gctomove);
3800 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3801 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3802 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3806 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3807 if(gc_checkCoreStatus_I()) {
3808 // all cores have finished compacting
3809 // restore the gcstatus of all cores
3810 for(i = 0; i < NUMCORES4GC; ++i) {
3811 gccorestatus[i] = 1;
3813 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3816 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3817 // check if there are spare mem for pending move requires
3818 if(COMPACTPHASE == gcphase) {
3820 BAMBOO_DEBUGPRINT(0xe003);
3822 resolvePendingMoveRequest();
3824 BAMBOO_DEBUGPRINT_REG(gctomove);
3828 BAMBOO_DEBUGPRINT(0xe004);
3832 } // if(gc_checkCoreStatus_I()) else ...
3836 BAMBOO_DEBUGPRINT(0xe005);
3837 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3838 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3839 BAMBOO_DEBUGPRINT_REG(gctomove);
3841 to->ptr = gcmovestartaddr;
3842 to->numblocks = gcblock2fill - 1;
3843 to->bound = (to->numblocks==0) ?
3844 BAMBOO_SMEM_SIZE_L :
3845 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3846 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3847 to->offset = to->ptr - to->base;
3848 to->top = (to->numblocks==0) ?
3849 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3851 to->offset = BAMBOO_CACHE_LINE_SIZE;
3852 to->ptr += to->offset; // for header
3853 to->top += to->offset;
3854 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3855 localcompact = true;
3857 localcompact = false;
3861 } else if(!finishcompact) {
3865 } // while(COMPACTPHASE == gcphase)
3869 #ifdef RAWPATH // TODO GC_DEBUG
3870 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3871 udn_tile_coord_y());
3876 #ifdef RAWPATH // TODO GC_DEBUG
3877 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3878 udn_tile_coord_y());
3886 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3887 // Note: all cores should flush their runtime data including non-gc
3889 for(i = 1; i < NUMCORES4GC; ++i) {
3890 // send start flush messages to all cores
3891 gccorestatus[i] = 1;
3892 send_msg_1(i, GCSTARTMAPINFO, false);
3897 #ifdef RAWPATH // TODO GC_DEBUG
3898 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3899 udn_tile_coord_y());
3903 #ifdef RAWPATH // TODO GC_DEBUG
3904 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3905 udn_tile_coord_y());
3907 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3908 while(MAPPHASE == gcphase) {
3909 // check the status of all cores
3910 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3911 if(gc_checkCoreStatus_I()) {
3912 // all cores have finished sending mapping info
3913 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3916 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3917 } // while(MAPPHASE == gcphase)
3919 gcphase = FLUSHPHASE;
3920 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3921 // Note: all cores should flush their runtime data including non-gc
3923 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3924 // send start flush messages to all cores
3925 gccorestatus[i] = 1;
3926 send_msg_1(i, GCSTARTFLUSH, false);
3931 #ifdef RAWPATH // TODO GC_DEBUG
3932 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3933 udn_tile_coord_y());
3937 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3938 while(FLUSHPHASE == gcphase) {
3939 // check the status of all cores
3940 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3941 if(gc_checkAllCoreStatus_I()) {
3942 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3945 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3946 } // while(FLUSHPHASE == gcphase)
3947 #ifdef RAWPATH // TODO GC_DEBUG
3948 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3949 udn_tile_coord_y());
3952 #ifdef GC_CACHE_ADAPT
3953 // now the master core need to decide the new cache strategy
3954 cacheAdapt_master();
3956 gcphase = PREFINISHPHASE;
3957 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3958 // Note: all cores should flush their runtime data including non-gc
3960 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3961 // send start flush messages to all cores
3962 gccorestatus[i] = 1;
3963 send_msg_1(i, GCSTARTPREF, false);
3968 #ifdef RAWPATH // TODO GC_DEBUG
3969 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3970 udn_tile_coord_y());
3972 // cache adapt phase
3973 cacheAdapt_mutator();
3974 #ifdef GC_CACHE_ADAPT_OUTPUT
3975 bamboo_output_cache_policy();
3977 cacheAdapt_gc(false);
3978 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3979 while(PREFINISHPHASE == gcphase) {
3980 // check the status of all cores
3981 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3982 if(gc_checkAllCoreStatus_I()) {
3983 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3986 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3987 } // while(PREFINISHPHASE == gcphase)
3988 #endif // GC_CACHE_ADAPT
3990 gcphase = FINISHPHASE;
3992 // invalidate all shared mem pointers
3993 // put it here as it takes time to inform all the other cores to
3994 // finish gc and it might cause problem when some core resumes
3995 // mutator earlier than the other cores
3996 bamboo_cur_msp = NULL;
3997 bamboo_smem_size = 0;
3998 bamboo_smem_zero_top = NULL;
4000 gcprocessing = false;
4005 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4006 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
4007 // send gc finish messages to all cores
4008 send_msg_1(i, GCFINISH, false);
4009 gccorestatus[i] = 1;
4011 #ifdef RAWPATH // TODO GC_DEBUG
4012 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
4013 udn_tile_coord_y());
4016 } // void gc_master(struct garbagelist * stackptr)
4018 inline bool gc(struct garbagelist * stackptr) {
4021 gcprocessing = false;
4025 // core coordinator routine
4026 if(0 == BAMBOO_NUM_OF_CORE) {
4028 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
4029 udn_tile_coord_y());
4031 bool isallstall = true;
4032 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4033 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4035 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
4036 if(gccorestatus[ti] != 0) {
4042 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4043 // some of the cores are still executing the mutator and did not reach
4044 // some gc safe point, therefore it is not ready to do gc
4045 // in case that there are some pregc information msg lost, send a confirm
4046 // msg to the 'busy' core
4047 send_msg_1(ti, GCSTARTPRE, false);
4055 //BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4056 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
4057 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
4060 BAMBOO_DEBUGPRINT(0xec04);
4062 for(int i = 0; i < NUMCORESACTIVE; ++i) {
4063 sumsendobj += gcnumsendobjs[0][i];
4065 BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
4067 } // for(i = 1; i < NUMCORESACTIVE; ++i)
4069 BAMBOO_DEBUGPRINT(0xec05);
4070 BAMBOO_DEBUGPRINT_REG(sumsendobj);
4072 for(int i = 0; i < NUMCORESACTIVE; ++i) {
4073 sumsendobj -= gcnumreceiveobjs[0][i];
4075 BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
4077 } // for(i = 1; i < NUMCORESACTIVE; ++i)
4079 BAMBOO_DEBUGPRINT(0xec06);
4080 BAMBOO_DEBUGPRINT_REG(sumsendobj);
4082 if(0 != sumsendobj) {
4083 // there were still some msgs on the fly, wait until there
4084 // are some update pregc information coming and check it again
4086 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4094 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4097 #ifdef RAWPATH // TODO GC_DEBUG
4098 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
4101 // Zero out the remaining bamboo_cur_msp
4102 // Only zero out the first 4 bytes of the remaining memory
4103 // Move the operation here because for the GC_CACHE_ADAPT version,
4104 // we need to make sure during the gcinit phase the shared heap is not
4105 // touched. Otherwise, there would be problem when adapt the cache
4107 if((bamboo_cur_msp != 0)
4108 && (bamboo_smem_zero_top == bamboo_cur_msp)
4109 && (bamboo_smem_size > 0)) {
4110 *((int *)bamboo_cur_msp) = 0;
4112 #ifdef GC_FLUSH_DTLB
4113 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4114 BAMBOO_CLEAN_DTLB();
4115 gc_num_flush_dtlb++;
4118 #ifdef GC_CACHE_ADAPT
4119 #ifdef GC_CACHE_SAMPLING
4120 // disable the timer interrupt
4121 bamboo_mask_timer_intr();
4122 // get the sampling data
4123 bamboo_output_dtlb_sampling();
4124 #endif // GC_CACHE_SAMPLING
4125 #endif // GC_CACHE_ADAPT
4126 gcprocessing = true;
4127 gc_master(stackptr);
4128 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
4129 // Zero out the remaining bamboo_cur_msp
4130 // Only zero out the first 4 bytes of the remaining memory
4131 // Move the operation here because for the GC_CACHE_ADAPT version,
4132 // we need to make sure during the gcinit phase the shared heap is not
4133 // touched. Otherwise, there would be problem when adapt the cache
4135 if((bamboo_cur_msp != 0)
4136 && (bamboo_smem_zero_top == bamboo_cur_msp)
4137 && (bamboo_smem_size > 0)) {
4138 *((int *)bamboo_cur_msp) = 0;
4140 #ifdef GC_FLUSH_DTLB
4141 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4142 BAMBOO_CLEAN_DTLB();
4143 gc_num_flush_dtlb++;
4146 #ifdef GC_CACHE_ADAPT
4147 #ifdef GC_CACHE_SAMPLING
4148 // disable the timer interrupt
4149 bamboo_mask_timer_intr();
4150 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4151 // get the sampling data
4152 bamboo_output_dtlb_sampling();
4154 #endif // GC_CACHE_SAMPLING
4155 #endif // GC_CACHE_ADAPT
4156 gcprocessing = true;
4157 gc_collect(stackptr);
4159 // invalidate all shared mem pointers
4160 bamboo_cur_msp = NULL;
4161 bamboo_smem_size = 0;
4162 bamboo_smem_zero_top = NULL;
4164 gcprocessing = false;
4166 // Zero out the remaining bamboo_cur_msp
4167 // Only zero out the first 4 bytes of the remaining memory
4168 // Move the operation here because for the GC_CACHE_ADAPT version,
4169 // we need to make sure during the gcinit phase the shared heap is not
4170 // touched. Otherwise, there would be problem when adapt the cache
4172 if((bamboo_cur_msp != 0)
4173 && (bamboo_smem_zero_top == bamboo_cur_msp)
4174 && (bamboo_smem_size > 0)) {
4175 *((int *)bamboo_cur_msp) = 0;
4177 #ifdef GC_FLUSH_DTLB
4178 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4179 BAMBOO_CLEAN_DTLB();
4180 gc_num_flush_dtlb++;
4183 #ifdef GC_CACHE_ADAPT
4184 #ifdef GC_CACHE_SAMPLING
4185 // disable the timer interrupt
4186 bamboo_mask_timer_intr();
4187 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4188 // get the sampling data
4189 bamboo_output_dtlb_sampling();
4191 #endif // GC_CACHE_SAMPLING
4192 #endif // GC_CACHE_ADAPT
4193 // not a gc core, should wait for gcfinish msg
4194 gcprocessing = true;
4195 gc_nocollect(stackptr);
4197 // invalidate all shared mem pointers
4198 bamboo_cur_msp = NULL;
4199 bamboo_smem_size = 0;
4200 bamboo_smem_zero_top = NULL;
4202 gcprocessing = false;
4204 #ifdef GC_CACHE_ADAPT
4205 #ifdef GC_CACHE_SAMPLING
4206 // reset the sampling arrays
4207 bamboo_dtlb_sampling_reset();
4208 #endif // GC_CACHE_SAMPLING
4209 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4210 // zero out the gccachesamplingtbl
4211 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4212 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4213 size_cachesamplingtbl_local_r);
4214 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
4215 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4218 #ifdef GC_CACHE_SAMPLING
4219 // enable the timer interrupt
4220 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4221 bamboo_unmask_timer_intr();
4222 #endif // GC_CACHE_SAMPLING
4223 #endif // GC_CACHE_ADAPT
4225 } // void gc(struct garbagelist * stackptr)
4228 inline void gc_profileStart(void) {
4229 if(!gc_infoOverflow) {
4230 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4231 gc_infoArray[gc_infoIndex] = gcInfo;
4233 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4237 inline void gc_profileItem(void) {
4238 if(!gc_infoOverflow) {
4239 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4240 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4244 inline void gc_profileEnd(void) {
4245 if(!gc_infoOverflow) {
4246 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4247 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4248 gcInfo->time[gcInfo->index++] = gc_num_livespace;
4249 gcInfo->time[gcInfo->index++] = gc_num_freespace;
4250 gcInfo->time[gcInfo->index++] = gc_num_lobj;
4251 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4252 gcInfo->time[gcInfo->index++] = gc_num_obj;
4253 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4254 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4256 if(gc_infoIndex == GCINFOLENGTH) {
4257 gc_infoOverflow = true;
4258 //taskInfoIndex = 0;
4263 // output the profiling data
4264 void gc_outputProfileData() {
4267 unsigned long long totalgc = 0;
4269 //printf("Start Time, End Time, Duration\n");
4270 // output task related info
4271 for(i = 0; i < gc_infoIndex; i++) {
4272 GCInfo * gcInfo = gc_infoArray[i];
4273 unsigned long long tmp = 0;
4274 for(j = 0; j < gcInfo->index; j++) {
4275 printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
4276 tmp = gcInfo->time[j];
4278 tmp = (tmp-gcInfo->time[0]);
4279 printf(" ++ %lld \n", tmp);
4283 if(gc_infoOverflow) {
4284 printf("Caution: gc info overflow!\n");
4287 printf("\n\n total gc time: %lld \n", totalgc);
4291 unsigned long long totalgc = 0;
4293 #ifndef BAMBOO_MEMPROF
4294 BAMBOO_DEBUGPRINT(0xdddd);
4296 // output task related info
4297 for(i= 0; i < gc_infoIndex; i++) {
4298 GCInfo * gcInfo = gc_infoArray[i];
4299 #ifdef BAMBOO_MEMPROF
4300 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4302 unsigned long long tmp = 0;
4303 BAMBOO_DEBUGPRINT(0xddda);
4304 for(j = 0; j < gcInfo->index - 7; j++) {
4305 BAMBOO_DEBUGPRINT(gcInfo->time[j]);
4306 BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
4307 BAMBOO_DEBUGPRINT(0xdddb);
4308 tmp = gcInfo->time[j];
4310 tmp = (tmp-gcInfo->time[0]);
4311 BAMBOO_DEBUGPRINT_REG(tmp);
4312 BAMBOO_DEBUGPRINT(0xdddc);
4313 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]);
4314 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]);
4315 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]);
4316 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]);
4317 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]);
4318 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]);
4319 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]);
4320 BAMBOO_DEBUGPRINT(0xddde);
4324 #ifndef BAMBOO_MEMPROF
4325 BAMBOO_DEBUGPRINT(0xdddf);
4327 BAMBOO_DEBUGPRINT_REG(totalgc);
4329 if(gc_infoOverflow) {
4330 BAMBOO_DEBUGPRINT(0xefee);
4333 #ifndef BAMBOO_MEMPROF
4334 BAMBOO_DEBUGPRINT(0xeeee);
4338 #endif // #ifdef GC_PROFILE