3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 //void * dsts[NUMLOBJPTRS];
45 int lengths[NUMLOBJPTRS];
46 //void * origs[NUMLOBJPTRS];
47 int hosts[NUMLOBJPTRS];
48 struct lobjpointerblock *next;
49 struct lobjpointerblock *prev;
52 struct lobjpointerblock *gclobjhead=NULL;
53 int gclobjheadindex=0;
54 struct lobjpointerblock *gclobjtail=NULL;
55 int gclobjtailindex=0;
56 struct lobjpointerblock *gclobjtail2=NULL;
57 int gclobjtailindex2=0;
58 struct lobjpointerblock *gclobjspare=NULL;
61 typedef struct gc_cache_revise_info {
62 int orig_page_start_va;
68 int revised_sampling[NUMCORESACTIVE];
69 } gc_cache_revise_info_t;
70 gc_cache_revise_info_t gc_cache_revise_infomation;
71 #endif// GC_CACHE_ADAPT
74 // dump whole mem in blocks
75 inline void dumpSMem() {
83 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
85 // reserved blocks for sblocktbl
86 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
88 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
89 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
90 udn_tile_coord_x(), udn_tile_coord_y(),
91 *((int *)(i)), *((int *)(i + 4)),
92 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
93 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
94 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
95 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
96 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
97 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
98 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
100 sblock = gcreservedsb;
101 bool advanceblock = false;
103 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
104 advanceblock = false;
105 // computing sblock # and block #, core coordinate (x,y) also
106 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
108 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
109 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
121 coren = gc_block2core[block%(NUMCORES4GC*2)];
123 // compute core coordinate
124 BAMBOO_COORDS(coren, &x, &y);
125 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
126 udn_tile_coord_x(), udn_tile_coord_y(),
127 block, sblock++, x, y,
128 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
131 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
132 udn_tile_coord_x(), udn_tile_coord_y(),
133 *((int *)(i)), *((int *)(i + 4)),
134 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
135 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
136 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
137 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
138 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
139 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
140 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
142 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
146 // should be invoked with interruption closed
147 inline void gc_enqueue_I(void *ptr) {
149 BAMBOO_DEBUGPRINT(0xe601);
150 BAMBOO_DEBUGPRINT_REG(ptr);
152 if (gcheadindex==NUMPTRS) {
153 struct pointerblock * tmp;
158 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
159 } // if (gcspare!=NULL)
163 } // if (gcheadindex==NUMPTRS)
164 gchead->ptrs[gcheadindex++]=ptr;
166 BAMBOO_DEBUGPRINT(0xe602);
168 } // void gc_enqueue_I(void *ptr)
170 // dequeue and destroy the queue
171 inline void * gc_dequeue_I() {
172 if (gctailindex==NUMPTRS) {
173 struct pointerblock *tmp=gctail;
180 } // if (gcspare!=NULL)
181 } // if (gctailindex==NUMPTRS)
182 return gctail->ptrs[gctailindex++];
183 } // void * gc_dequeue()
185 // dequeue and do not destroy the queue
186 inline void * gc_dequeue2_I() {
187 if (gctailindex2==NUMPTRS) {
188 struct pointerblock *tmp=gctail2;
189 gctail2=gctail2->next;
191 } // if (gctailindex2==NUMPTRS)
192 return gctail2->ptrs[gctailindex2++];
193 } // void * gc_dequeue2()
195 inline int gc_moreItems_I() {
196 if ((gchead==gctail)&&(gctailindex==gcheadindex))
199 } // int gc_moreItems()
201 inline int gc_moreItems2_I() {
202 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
205 } // int gc_moreItems2()
207 // should be invoked with interruption closed
208 // enqueue a large obj: start addr & length
209 inline void gc_lobjenqueue_I(void *ptr,
213 BAMBOO_DEBUGPRINT(0xe901);
215 if (gclobjheadindex==NUMLOBJPTRS) {
216 struct lobjpointerblock * tmp;
217 if (gclobjspare!=NULL) {
221 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
222 } // if (gclobjspare!=NULL)
223 gclobjhead->next=tmp;
224 tmp->prev = gclobjhead;
227 } // if (gclobjheadindex==NUMLOBJPTRS)
228 gclobjhead->lobjs[gclobjheadindex]=ptr;
229 gclobjhead->lengths[gclobjheadindex]=length;
230 gclobjhead->hosts[gclobjheadindex++]=host;
232 BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
233 BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
234 BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
236 } // void gc_lobjenqueue_I(void *ptr...)
238 // dequeue and destroy the queue
239 inline void * gc_lobjdequeue_I(int * length,
241 if (gclobjtailindex==NUMLOBJPTRS) {
242 struct lobjpointerblock *tmp=gclobjtail;
243 gclobjtail=gclobjtail->next;
245 gclobjtail->prev = NULL;
246 if (gclobjspare!=NULL) {
252 } // if (gclobjspare!=NULL)
253 } // if (gclobjtailindex==NUMLOBJPTRS)
255 *length = gclobjtail->lengths[gclobjtailindex];
258 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
260 return gclobjtail->lobjs[gclobjtailindex++];
261 } // void * gc_lobjdequeue()
263 inline int gc_lobjmoreItems_I() {
264 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
267 } // int gc_lobjmoreItems()
269 // dequeue and don't destroy the queue
270 inline void gc_lobjdequeue2_I() {
271 if (gclobjtailindex2==NUMLOBJPTRS) {
272 gclobjtail2=gclobjtail2->next;
276 } // if (gclobjtailindex2==NUMLOBJPTRS)
277 } // void * gc_lobjdequeue2()
279 inline int gc_lobjmoreItems2_I() {
280 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
283 } // int gc_lobjmoreItems2()
285 // 'reversly' dequeue and don't destroy the queue
286 inline void gc_lobjdequeue3_I() {
287 if (gclobjtailindex2==0) {
288 gclobjtail2=gclobjtail2->prev;
289 gclobjtailindex2=NUMLOBJPTRS-1;
292 } // if (gclobjtailindex2==NUMLOBJPTRS)
293 } // void * gc_lobjdequeue3()
295 inline int gc_lobjmoreItems3_I() {
296 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
299 } // int gc_lobjmoreItems3()
301 inline void gc_lobjqueueinit4_I() {
302 gclobjtail2 = gclobjtail;
303 gclobjtailindex2 = gclobjtailindex;
304 } // void gc_lobjqueueinit2()
306 inline void * gc_lobjdequeue4_I(int * length,
308 if (gclobjtailindex2==NUMLOBJPTRS) {
309 gclobjtail2=gclobjtail2->next;
311 } // if (gclobjtailindex==NUMLOBJPTRS)
313 *length = gclobjtail2->lengths[gclobjtailindex2];
316 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
318 return gclobjtail2->lobjs[gclobjtailindex2++];
319 } // void * gc_lobjdequeue()
321 inline int gc_lobjmoreItems4_I() {
322 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
325 } // int gc_lobjmoreItems(
327 INTPTR gccurr_heapbound = 0;
329 inline void gettype_size(void * ptr,
332 int type = ((int *)ptr)[0];
334 if(type < NUMCLASSES) {
336 size = classsize[type];
339 struct ArrayObject *ao=(struct ArrayObject *)ptr;
340 int elementsize=classsize[type];
341 int length=ao->___length___;
342 size=sizeof(struct ArrayObject)+length*elementsize;
343 } // if(type < NUMCLASSES)
348 inline bool isLarge(void * ptr,
352 BAMBOO_DEBUGPRINT(0xe701);
353 BAMBOO_DEBUGPRINT_REG(ptr);
355 // check if a pointer is referring to a large object
356 gettype_size(ptr, ttype, tsize);
358 BAMBOO_DEBUGPRINT(*tsize);
360 int bound = (BAMBOO_SMEM_SIZE);
361 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
362 bound = (BAMBOO_SMEM_SIZE_L);
364 if((((int)ptr-gcbaseva)%(bound))==0) {
365 // ptr is a start of a block
367 BAMBOO_DEBUGPRINT(0xe702);
368 BAMBOO_DEBUGPRINT(1);
372 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
373 // it acrosses the boundary of current block
375 BAMBOO_DEBUGPRINT(0xe703);
376 BAMBOO_DEBUGPRINT(1);
381 BAMBOO_DEBUGPRINT(0);
384 } // bool isLarge(void * ptr, int * ttype, int * tsize)
386 inline int hostcore(void * ptr) {
387 // check the host core of ptr
389 RESIDECORE(ptr, &host);
391 BAMBOO_DEBUGPRINT(0xedd0);
392 BAMBOO_DEBUGPRINT_REG(ptr);
393 BAMBOO_DEBUGPRINT_REG(host);
396 } // int hostcore(void * ptr)
398 inline void cpu2coords(int coren,
401 *x = bamboo_cpu2coords[2*coren];
402 *y = bamboo_cpu2coords[2*coren+1];
403 } // void cpu2coords(...)
405 inline bool isLocal(void * ptr) {
406 // check if a pointer is in shared heap on this core
407 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
408 } // bool isLocal(void * ptr)
410 inline bool gc_checkCoreStatus_I() {
411 bool allStall = true;
412 for(int i = 0; i < NUMCORES4GC; ++i) {
413 if(gccorestatus[i] != 0) {
416 } // if(gccorestatus[i] != 0)
417 } // for(i = 0; i < NUMCORES4GC; ++i)
421 inline bool gc_checkAllCoreStatus_I() {
422 bool allStall = true;
423 for(int i = 0; i < NUMCORESACTIVE; ++i) {
424 if(gccorestatus[i] != 0) {
427 } // if(gccorestatus[i] != 0)
428 } // for(i = 0; i < NUMCORESACTIVE; ++i)
432 inline void checkMarkStatue() {
434 BAMBOO_DEBUGPRINT(0xee01);
438 (waitconfirm && (numconfirm == 0))) {
440 BAMBOO_DEBUGPRINT(0xee02);
445 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
448 entry_index = gcnumsrobjs_index;
450 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
451 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
452 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
453 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
454 // check the status of all cores
455 bool allStall = gc_checkAllCoreStatus_I();
457 BAMBOO_DEBUGPRINT(0xee03);
461 BAMBOO_DEBUGPRINT(0xee04);
466 BAMBOO_DEBUGPRINT(0xee05);
468 // the first time found all cores stall
469 // send out status confirm msg to all other cores
470 // reset the corestatus array too
471 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
473 numconfirm = NUMCORESACTIVE - 1;
474 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
475 for(i = 1; i < NUMCORESACTIVE; ++i) {
477 // send mark phase finish confirm request msg to core i
478 send_msg_1(i, GCMARKCONFIRM, false);
479 } // for(i = 1; i < NUMCORESACTIVE; ++i)
482 // check if the sum of send objs and receive obj are the same
483 // yes->check if the info is the latest; no->go on executing
485 for(i = 0; i < NUMCORESACTIVE; ++i) {
486 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
487 } // for(i = 0; i < NUMCORESACTIVE; ++i)
489 BAMBOO_DEBUGPRINT(0xee06);
490 BAMBOO_DEBUGPRINT_REG(sumsendobj);
492 for(i = 0; i < NUMCORESACTIVE; ++i) {
493 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
494 } // for(i = 0; i < NUMCORESACTIVE; ++i)
496 BAMBOO_DEBUGPRINT(0xee07);
497 BAMBOO_DEBUGPRINT_REG(sumsendobj);
499 if(0 == sumsendobj) {
500 // Check if there are changes of the numsendobjs or numreceiveobjs on
502 bool ischanged = false;
503 for(i = 0; i < NUMCORESACTIVE; ++i) {
504 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
505 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
509 } // for(i = 0; i < NUMCORESACTIVE; ++i)
511 BAMBOO_DEBUGPRINT(0xee08);
512 BAMBOO_DEBUGPRINT_REG(ischanged);
516 BAMBOO_DEBUGPRINT(0xee09);
518 // all the core status info are the latest
520 gcphase = COMPACTPHASE;
521 // restore the gcstatus for all cores
522 for(i = 0; i < NUMCORESACTIVE; ++i) {
524 } // for(i = 0; i < NUMCORESACTIVE; ++i)
527 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
530 // There were changes between phase 1 and phase 2, can not decide
531 // whether the mark phase has been finished
533 // As it fails in phase 2, flip the entries
534 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
535 } // if(0 == sumsendobj) else ...
536 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
537 } // if(!gcwaitconfirm) else()
539 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
541 } // if((!waitconfirm)...
543 BAMBOO_DEBUGPRINT(0xee0a);
545 } // void checkMarkStatue()
547 inline bool preGC() {
548 // preparation for gc
549 // make sure to clear all incoming msgs espacially transfer obj msgs
551 BAMBOO_DEBUGPRINT(0xec01);
555 (waitconfirm && (numconfirm == 0))) {
556 // send out status confirm msgs to all cores to check if there are
557 // transfer obj msgs on-the-fly
559 numconfirm = NUMCORESACTIVE - 1;
560 for(i = 1; i < NUMCORESACTIVE; ++i) {
562 // send status confirm msg to core i
563 send_msg_1(i, STATUSCONFIRM, false);
564 } // for(i = 1; i < NUMCORESACTIVE; ++i)
567 BAMBOO_DEBUGPRINT(0xec02);
570 if(numconfirm == 0) {
573 } // wait for confirmations
577 BAMBOO_DEBUGPRINT(0xec03);
579 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
580 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
583 BAMBOO_DEBUGPRINT(0xec04);
585 for(i = 0; i < NUMCORESACTIVE; ++i) {
586 sumsendobj += numsendobjs[i];
588 BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
590 } // for(i = 1; i < NUMCORESACTIVE; ++i)
592 BAMBOO_DEBUGPRINT(0xec05);
593 BAMBOO_DEBUGPRINT_REG(sumsendobj);
595 for(i = 0; i < NUMCORESACTIVE; ++i) {
596 sumsendobj -= numreceiveobjs[i];
598 BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
600 } // for(i = 1; i < NUMCORESACTIVE; ++i)
602 BAMBOO_DEBUGPRINT(0xec06);
603 BAMBOO_DEBUGPRINT_REG(sumsendobj);
605 if(0 == sumsendobj) {
608 // still have some transfer obj msgs on-the-fly, can not start gc
610 } // if(0 == sumsendobj)
613 BAMBOO_DEBUGPRINT(0xec07);
615 // previously asked for status confirmation and do not have all the
616 // confirmations yet, can not start gc
618 } // if((!waitconfirm) ||
621 inline void initGC() {
623 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
624 for(i = 0; i < NUMCORES4GC; ++i) {
626 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
627 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
629 gcrequiredmems[i] = 0;
630 gcfilledblocks[i] = 0;
632 } // for(i = 0; i < NUMCORES4GC; ++i)
633 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
635 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
636 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
641 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
642 gcself_numsendobjs = 0;
643 gcself_numreceiveobjs = 0;
644 gcmarkedptrbound = 0;
647 //gcismapped = false;
658 gcheadindex=gctailindex=gctailindex2 = 0;
659 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
661 gctailindex = gctailindex2 = gcheadindex;
662 gctail = gctail2 = gchead;
665 // initialize the large obj queues
666 if (gclobjhead==NULL) {
669 gclobjtailindex2 = 0;
670 gclobjhead=gclobjtail=gclobjtail2=
671 RUNMALLOC(sizeof(struct lobjpointerblock));
673 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
674 gclobjtail = gclobjtail2 = gclobjhead;
676 gclobjhead->next = gclobjhead->prev = NULL;
678 #ifdef LOCALHASHTBL_TEST
679 freeRuntimeHash(gcpointertbl);
680 gcpointertbl = allocateRuntimeHash(20);
682 mgchashreset(gcpointertbl);
684 //gcpointertbl = allocateMGCHash(20);
686 freeMGCHash(gcforwardobjtbl);
687 gcforwardobjtbl = allocateMGCHash(20, 3);
689 // initialize the mapping info related structures
690 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
691 // Never free the shared hash table, just reset it
692 /*freeGCSharedHash(gcsharedptbl);
693 gcsharedptbl = allocateGCSharedHash(20);*/
694 mgcsharedhashReset(gcsharedptbl);
696 // Zero out the remaining bamboo_cur_msp
697 // Only zero out the first 4 bytes of the remaining memory
698 /*if((bamboo_cur_msp != 0)
699 && (bamboo_smem_zero_top == bamboo_cur_msp)
700 && (bamboo_smem_size > 0)) {
701 *((int *)bamboo_cur_msp) = 0;
704 gc_num_livespace = 0;
705 gc_num_freespace = 0;
707 gc_num_lobjspace = 0;
709 gc_num_forwardobj = 0;
710 gc_num_profiles = NUMCORESACTIVE - 1;
714 // compute load balance for all cores
715 inline int loadbalance(int * heaptop) {
716 // compute load balance
719 // get the total loads
720 int tloads = gcloads[STARTUPCORE];
721 for(i = 1; i < NUMCORES4GC; i++) {
722 tloads += gcloads[i];
724 *heaptop = gcbaseva + tloads;
727 BAMBOO_DEBUGPRINT(0xdddd);
728 BAMBOO_DEBUGPRINT_REG(tloads);
729 BAMBOO_DEBUGPRINT_REG(*heaptop);
732 BLOCKINDEX(*heaptop, &b);
733 int numbpc = b / NUMCORES4GC; // num of blocks per core
735 BAMBOO_DEBUGPRINT_REG(b);
736 BAMBOO_DEBUGPRINT_REG(numbpc);
739 RESIDECORE(heaptop, &gctopcore);
741 BAMBOO_DEBUGPRINT_REG(gctopcore);
744 } // void loadbalance(int * heaptop)
746 inline bool cacheLObjs() {
747 // check the total mem size need for large objs
748 unsigned long long sumsize = 0;
751 BAMBOO_DEBUGPRINT(0xe801);
753 gclobjtail2 = gclobjtail;
754 gclobjtailindex2 = gclobjtailindex;
758 // compute total mem size required and sort the lobjs in ascending order
759 while(gc_lobjmoreItems2_I()) {
761 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
762 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
763 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
769 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
770 BAMBOO_DEBUGPRINT_REG(tmp_len);
771 BAMBOO_DEBUGPRINT_REG(sumsize);
773 int i = gclobjtailindex2-1;
774 struct lobjpointerblock * tmp_block = gclobjtail2;
775 // find the place to insert
778 if(tmp_block->prev == NULL) {
781 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
782 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
783 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
784 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
785 tmp_block = tmp_block->prev;
789 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
791 if(tmp_block->lobjs[i-1] > tmp_lobj) {
792 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
793 tmp_block->lengths[i] = tmp_block->lengths[i-1];
794 tmp_block->hosts[i] = tmp_block->hosts[i-1];
798 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
799 } // if(i ==0 ) else {}
802 if(i != gclobjtailindex2 - 1) {
803 tmp_block->lobjs[i] = tmp_lobj;
804 tmp_block->lengths[i] = tmp_len;
805 tmp_block->hosts[i] = tmp_host;
807 } // while(gc_lobjmoreItems2())
810 gc_num_lobjspace = sumsize;
812 // check if there are enough space to cache these large objs
813 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
814 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
815 // do not have enough room to cache large objs
817 BAMBOO_DEBUGPRINT(0xe802);
818 BAMBOO_DEBUGPRINT_REG(dst);
819 BAMBOO_DEBUGPRINT_REG(gcheaptop);
820 BAMBOO_DEBUGPRINT_REG(sumsize);
825 BAMBOO_DEBUGPRINT(0xe803);
826 BAMBOO_DEBUGPRINT_REG(dst);
827 BAMBOO_DEBUGPRINT_REG(gcheaptop);
830 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
831 // cache the largeObjs to the top of the shared heap
832 //gclobjtail2 = gclobjtail;
833 //gclobjtailindex2 = gclobjtailindex;
834 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
835 while(gc_lobjmoreItems3_I()) {
837 size = gclobjtail2->lengths[gclobjtailindex2];
838 // set the mark field to , indicating that this obj has been moved
839 // and need to be flushed
840 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
842 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
843 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
845 //BAMBOO_WRITE_HINT_CACHE(dst, size);
846 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
849 BAMBOO_DEBUGPRINT(0x804);
850 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
851 BAMBOO_DEBUGPRINT(dst);
852 BAMBOO_DEBUGPRINT_REG(size);
853 BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
854 BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
858 } // void cacheLObjs()
860 // update the bmmboo_smemtbl to record current shared mem usage
861 void updateSmemTbl(int coren,
864 int bound = BAMBOO_SMEM_SIZE_L;
865 BLOCKINDEX(localtop, <opcore);
866 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
867 bound = BAMBOO_SMEM_SIZE;
869 int load = (localtop-gcbaseva)%bound;
874 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
875 if(toset < ltopcore) {
876 bamboo_smemtbl[toset]=
877 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
879 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
881 } else if(toset == ltopcore) {
882 bamboo_smemtbl[toset] = load;
884 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
896 } // void updateSmemTbl(int, int)
898 inline void moveLObjs() {
900 BAMBOO_DEBUGPRINT(0xea01);
903 // update the gcmem_mixed_usedmem
904 gcmem_mixed_usedmem = 0;
906 // zero out the smemtbl
907 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
908 // find current heap top
909 // flush all gcloads to indicate the real heap top on one core
910 // previous it represents the next available ptr on a core
911 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
912 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
913 // edge of a block, check if this is exactly the heaptop
914 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
915 gcloads[0]+=(gcfilledblocks[0]>1 ?
916 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
918 updateSmemTbl(0, gcloads[0]);
920 BAMBOO_DEBUGPRINT(0xea02);
921 BAMBOO_DEBUGPRINT_REG(gcloads[0]);
922 BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
924 for(int i = 1; i < NUMCORES4GC; i++) {
927 BAMBOO_DEBUGPRINT(0xf000+i);
928 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
929 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
931 if((gcfilledblocks[i] > 0)
932 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
933 // edge of a block, check if this is exactly the heaptop
934 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
936 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
939 updateSmemTbl(i, gcloads[i]);
941 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
943 } // for(int i = 1; i < NUMCORES4GC; i++) {
945 // find current heap top
947 // a bug here: when using local allocation, directly move large objects
948 // to the highest free chunk might not be memory efficient
953 for(i = gcnumblock-1; i >= 0; i--) {
954 if(bamboo_smemtbl[i] > 0) {
959 tmpheaptop = gcbaseva;
961 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
962 (BAMBOO_SMEM_SIZE_L*i) :
963 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
966 // move large objs from gcheaptop to tmpheaptop
967 // write the header first
968 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
970 gcmem_mixed_usedmem += tomove;
973 BAMBOO_DEBUGPRINT(0xea03);
974 BAMBOO_DEBUGPRINT_REG(tomove);
975 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
976 BAMBOO_DEBUGPRINT_REG(gcheaptop);
978 // flush the sbstartbl
979 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
980 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
982 gcheaptop = tmpheaptop;
984 // check how many blocks it acrosses
985 int remain = tmpheaptop-gcbaseva;
986 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
987 int b = 0; // number of the block
988 BLOCKINDEX(tmpheaptop, &b);
989 // check the remaining space in this block
990 bound = (BAMBOO_SMEM_SIZE);
991 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
992 bound = (BAMBOO_SMEM_SIZE_L);
994 remain = bound - remain%bound;
997 BAMBOO_DEBUGPRINT(0xea04);
1003 int base = tmpheaptop;
1005 remain -= BAMBOO_CACHE_LINE_SIZE;
1006 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1007 gc_lobjqueueinit4_I();
1008 while(gc_lobjmoreItems4_I()) {
1009 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
1010 ALIGNSIZE(size, &isize);
1011 if(remain < isize) {
1012 // this object acrosses blocks
1014 // close current block, fill its header
1015 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1016 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1017 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
1021 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1022 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1024 remain -= BAMBOO_CACHE_LINE_SIZE;
1025 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1026 BLOCKINDEX(tmpheaptop, &b);
1027 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
1028 } // if(cpysize > 0)
1030 // move the large obj
1031 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1032 memmove(tmpheaptop, gcheaptop, size);
1034 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1035 memcpy(tmpheaptop, gcheaptop, size);
1037 // fill the remaining space with -2 padding
1038 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1040 BAMBOO_DEBUGPRINT(0xea05);
1041 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1042 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1043 BAMBOO_DEBUGPRINT_REG(size);
1044 BAMBOO_DEBUGPRINT_REG(isize);
1045 BAMBOO_DEBUGPRINT_REG(base);
1048 // cache the mapping info anyway
1049 //if(ptr != tmpheaptop) {
1050 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1051 #ifdef LOCALHASHTBL_TEST
1052 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1054 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1056 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1057 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1060 BAMBOO_DEBUGPRINT(0xcdca);
1061 BAMBOO_DEBUGPRINT_REG(ptr);
1062 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1064 if(host != BAMBOO_NUM_OF_CORE) {
1065 // send the original host core with the mapping info
1066 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1068 BAMBOO_DEBUGPRINT(0xcdcb);
1069 BAMBOO_DEBUGPRINT_REG(ptr);
1070 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1072 } // if(host != BAMBOO_NUM_OF_CORE)
1073 tmpheaptop += isize;
1075 // set the gcsbstarttbl and bamboo_smemtbl
1076 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
1077 for(int k = 1; k < tmpsbs; k++) {
1078 gcsbstarttbl[sb+k] = (INTPTR)(-1);
1081 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1082 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
1083 for(; b < tmpsbs; b++) {
1084 bamboo_smemtbl[b] = bound;
1085 if(b==NUMCORES4GC-1) {
1086 bound = BAMBOO_SMEM_SIZE;
1089 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
1090 gcsbstarttbl[sb] = (INTPTR)(-1);
1091 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1092 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1093 bamboo_smemtbl[b] = bound;
1095 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1096 remain = tmpheaptop-gcbaseva;
1097 bamboo_smemtbl[b] = remain%bound;
1098 remain = bound - bamboo_smemtbl[b];
1099 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1101 // close current block and fill the header
1102 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1103 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1106 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1107 // fill with 0 in case
1108 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
1110 remain -= BAMBOO_CACHE_LINE_SIZE;
1111 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1114 // move the large obj
1115 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1116 memmove(tmpheaptop, gcheaptop, size);
1118 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1119 memcpy(tmpheaptop, gcheaptop, size);
1121 // fill the remaining space with -2 padding
1122 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1124 BAMBOO_DEBUGPRINT(0xea06);
1125 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1126 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1127 BAMBOO_DEBUGPRINT_REG(size);
1128 BAMBOO_DEBUGPRINT_REG(isize);
1133 // cache the mapping info anyway
1134 //if(ptr != tmpheaptop) {
1135 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1136 #ifdef LOCALHASHTBL_TEST
1137 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1139 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1141 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1142 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1145 BAMBOO_DEBUGPRINT(0xcdcc);
1146 BAMBOO_DEBUGPRINT_REG(ptr);
1147 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1148 BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1150 if(host != BAMBOO_NUM_OF_CORE) {
1151 // send the original host core with the mapping info
1152 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1154 BAMBOO_DEBUGPRINT(0xcdcd);
1155 BAMBOO_DEBUGPRINT_REG(ptr);
1156 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1158 } // if(host != BAMBOO_NUM_OF_CORE)
1159 tmpheaptop += isize;
1161 // update bamboo_smemtbl
1162 bamboo_smemtbl[b] += isize;
1163 } // if(remain < isize) else ...
1164 } // while(gc_lobjmoreItems())
1166 // close current block, fill the header
1167 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1168 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1169 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
1171 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1173 gcheaptop = tmpheaptop;
1175 } // if(tomove == 0)
1178 BAMBOO_DEBUGPRINT(0xea07);
1179 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1182 bamboo_free_block = 0;
1185 tbound = (bamboo_free_block<NUMCORES4GC) ?
1186 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1187 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1188 bamboo_free_block++;
1190 // the first non-full partition
1196 // check how many live space there are
1197 gc_num_livespace = 0;
1198 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1199 gc_num_livespace += bamboo_smemtbl[tmpi];
1201 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1204 BAMBOO_DEBUGPRINT(0xea08);
1205 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1207 } // void moveLObjs()
1209 inline void markObj(void * objptr) {
1210 if(objptr == NULL) {
1213 if(ISSHAREDOBJ(objptr)) {
1214 int host = hostcore(objptr);
1215 if(BAMBOO_NUM_OF_CORE == host) {
1217 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1218 if(((int *)objptr)[6] == INIT) {
1219 // this is the first time that this object is discovered,
1220 // set the flag as DISCOVERED
1221 ((int *)objptr)[6] |= DISCOVERED;
1222 BAMBOO_CACHE_FLUSH_LINE(objptr);
1223 gc_enqueue_I(objptr);
1225 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1228 BAMBOO_DEBUGPRINT(0xbbbb);
1229 BAMBOO_DEBUGPRINT_REG(host);
1230 BAMBOO_DEBUGPRINT_REG(objptr);
1232 // check if this obj has been forwarded
1233 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1234 // send a msg to host informing that objptr is active
1235 send_msg_2(host, GCMARKEDOBJ, objptr, /*BAMBOO_NUM_OF_CORE,*/ false);
1237 gc_num_forwardobj++;
1238 #endif // GC_PROFILE
1239 gcself_numsendobjs++;
1240 MGCHashadd(gcforwardobjtbl, (int)objptr);
1244 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1245 gc_enqueue_I(objptr);
1246 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1247 } // if(ISSHAREDOBJ(objptr))
1248 } // void markObj(void * objptr)
1250 // enqueue root objs
1251 inline void tomark(struct garbagelist * stackptr) {
1252 if(MARKPHASE != gcphase) {
1254 BAMBOO_DEBUGPRINT_REG(gcphase);
1256 BAMBOO_EXIT(0xb101);
1258 gcbusystatus = true;
1262 // enqueue current stack
1263 while(stackptr!=NULL) {
1265 BAMBOO_DEBUGPRINT(0xe501);
1266 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1267 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1268 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1270 for(i=0; i<stackptr->size; i++) {
1271 if(stackptr->array[i] != NULL) {
1272 markObj(stackptr->array[i]);
1275 stackptr=stackptr->next;
1279 BAMBOO_DEBUGPRINT(0xe503);
1281 // enqueue objectsets
1282 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1283 for(i=0; i<NUMCLASSES; i++) {
1284 struct parameterwrapper ** queues =
1285 objectqueues[BAMBOO_NUM_OF_CORE][i];
1286 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1287 for(j = 0; j < length; ++j) {
1288 struct parameterwrapper * parameter = queues[j];
1289 struct ObjectHash * set=parameter->objectset;
1290 struct ObjectNode * ptr=set->listhead;
1292 markObj((void *)ptr->key);
1299 // euqueue current task descriptor
1300 if(currtpd != NULL) {
1302 BAMBOO_DEBUGPRINT(0xe504);
1304 for(i=0; i<currtpd->numParameters; i++) {
1305 markObj(currtpd->parameterArray[i]);
1310 BAMBOO_DEBUGPRINT(0xe505);
1312 // euqueue active tasks
1313 if(activetasks != NULL) {
1314 struct genpointerlist * ptr=activetasks->list;
1316 struct taskparamdescriptor *tpd=ptr->src;
1318 for(i=0; i<tpd->numParameters; i++) {
1319 markObj(tpd->parameterArray[i]);
1326 BAMBOO_DEBUGPRINT(0xe506);
1328 // enqueue cached transferred obj
1329 struct QueueItem * tmpobjptr = getHead(&objqueue);
1330 while(tmpobjptr != NULL) {
1331 struct transObjInfo * objInfo =
1332 (struct transObjInfo *)(tmpobjptr->objectptr);
1333 markObj(objInfo->objptr);
1334 tmpobjptr = getNextQueueItem(tmpobjptr);
1338 BAMBOO_DEBUGPRINT(0xe507);
1340 // enqueue cached objs to be transferred
1341 struct QueueItem * item = getHead(totransobjqueue);
1342 while(item != NULL) {
1343 struct transObjInfo * totransobj =
1344 (struct transObjInfo *)(item->objectptr);
1345 markObj(totransobj->objptr);
1346 item = getNextQueueItem(item);
1347 } // while(item != NULL)
1350 BAMBOO_DEBUGPRINT(0xe508);
1352 // enqueue lock related info
1353 for(i = 0; i < runtime_locklen; ++i) {
1354 markObj((void *)(runtime_locks[i].redirectlock));
1355 if(runtime_locks[i].value != NULL) {
1356 markObj((void *)(runtime_locks[i].value));
1360 } // void tomark(struct garbagelist * stackptr)
1362 inline void mark(bool isfirst,
1363 struct garbagelist * stackptr) {
1365 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
1369 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
1371 // enqueue root objs
1373 gccurr_heaptop = 0; // record the size of all active objs in this core
1374 // aligned but does not consider block boundaries
1375 gcmarkedptrbound = 0;
1378 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
1381 bool checkfield = true;
1382 bool sendStall = false;
1384 while(MARKPHASE == gcphase) {
1386 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
1389 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1390 bool hasItems = gc_moreItems2_I();
1391 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1393 BAMBOO_DEBUGPRINT(0xed05);
1399 gcbusystatus = true;
1401 void * ptr = gc_dequeue2_I();
1404 BAMBOO_DEBUGPRINT_REG(ptr);
1409 // check if it is a shared obj
1410 if(ISSHAREDOBJ(ptr)) {
1411 // a shared obj, check if it is a local obj on this core
1412 int host = hostcore(ptr);
1413 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1415 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1416 if(isLarge(ptr, &type, &size) && isnotmarked) {
1417 // ptr is a large object and not marked or enqueued
1419 BAMBOO_DEBUGPRINT(0xecec);
1420 BAMBOO_DEBUGPRINT_REG(ptr);
1421 BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1423 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1424 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1426 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1428 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1429 BAMBOO_CACHE_FLUSH_LINE(ptr);
1430 } else if(isnotmarked) {
1431 // ptr is an unmarked active object on this core
1432 ALIGNSIZE(size, &isize);
1433 gccurr_heaptop += isize;
1435 BAMBOO_DEBUGPRINT(0xaaaa);
1436 BAMBOO_DEBUGPRINT_REG(ptr);
1437 BAMBOO_DEBUGPRINT_REG(isize);
1438 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1441 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1442 BAMBOO_CACHE_FLUSH_LINE(ptr);
1444 if(ptr + size > gcmarkedptrbound) {
1445 gcmarkedptrbound = ptr + size;
1446 } // if(ptr + size > gcmarkedptrbound)
1448 // ptr is not an active obj or has been marked
1450 } // if(isLarge(ptr, &type, &size)) else ...
1451 } /* can never reach here
1454 if(BAMBOO_NUM_OF_CORE == 0) {
1455 BAMBOO_DEBUGPRINT(0xbbbb);
1456 BAMBOO_DEBUGPRINT_REG(host);
1457 BAMBOO_DEBUGPRINT_REG(ptr);
1460 // check if this obj has been forwarded
1461 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1462 // send a msg to host informing that ptr is active
1463 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1464 gcself_numsendobjs++;
1465 MGCHashadd(gcforwardobjtbl, (int)ptr);
1468 }// if(isLocal(ptr)) else ...*/
1469 } // if(ISSHAREDOBJ(ptr))
1471 BAMBOO_DEBUGPRINT(0xed06);
1475 // scan all pointers in ptr
1476 unsigned INTPTR * pointer;
1477 pointer=pointerarray[type];
1479 /* Array of primitives */
1481 } else if (((INTPTR)pointer)==1) {
1482 /* Array of pointers */
1483 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1484 int length=ao->___length___;
1486 for(j=0; j<length; j++) {
1488 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1492 INTPTR size=pointer[0];
1494 for(i=1; i<=size; i++) {
1495 unsigned int offset=pointer[i];
1496 void * objptr=*((void **)(((char *)ptr)+offset));
1499 } // if (pointer==0) else if ... else ...
1501 } // while(gc_moreItems2())
1503 BAMBOO_DEBUGPRINT(0xed07);
1505 gcbusystatus = false;
1506 // send mark finish msg to core coordinator
1507 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1509 BAMBOO_DEBUGPRINT(0xed08);
1511 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1512 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1513 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1514 gcself_numreceiveobjs;
1515 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1519 BAMBOO_DEBUGPRINT(0xed09);
1521 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1522 gcself_numsendobjs, gcself_numreceiveobjs, false);
1525 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1527 BAMBOO_DEBUGPRINT(0xed0a);
1530 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1532 BAMBOO_DEBUGPRINT(0xed0b);
1536 } // while(MARKPHASE == gcphase)
1541 inline void compact2Heaptophelper_I(int coren,
1546 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1547 if(STARTUPCORE == coren) {
1549 gcmovestartaddr = *p;
1550 gcdstcore = gctopcore;
1551 gcblock2fill = *numblocks + 1;
1553 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1556 BAMBOO_DEBUGPRINT_REG(coren);
1557 BAMBOO_DEBUGPRINT_REG(gctopcore);
1558 BAMBOO_DEBUGPRINT_REG(*p);
1559 BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1561 if(memneed < *remain) {
1563 BAMBOO_DEBUGPRINT(0xd104);
1566 gcrequiredmems[coren] = 0;
1567 gcloads[gctopcore] += memneed;
1568 *remain = *remain - memneed;
1571 BAMBOO_DEBUGPRINT(0xd105);
1573 // next available block
1575 gcfilledblocks[gctopcore] += 1;
1577 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1578 gcloads[gctopcore] = newbase;
1579 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1580 gcstopblock[gctopcore]++;
1581 gctopcore = NEXTTOPCORE(gctopblock);
1583 *numblocks = gcstopblock[gctopcore];
1584 *p = gcloads[gctopcore];
1586 *remain=(b<NUMCORES4GC) ?
1587 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1588 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1590 BAMBOO_DEBUGPRINT(0xd106);
1591 BAMBOO_DEBUGPRINT_REG(gctopcore);
1592 BAMBOO_DEBUGPRINT_REG(*p);
1593 BAMBOO_DEBUGPRINT_REG(b);
1594 BAMBOO_DEBUGPRINT_REG(*remain);
1596 } // if(memneed < remain)
1598 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1600 inline void compact2Heaptop() {
1601 // no cores with spare mem and some cores are blocked with pending move
1602 // find the current heap top and make them move to the heap top
1604 int numblocks = gcfilledblocks[gctopcore];
1605 //BASEPTR(gctopcore, numblocks, &p);
1606 p = gcloads[gctopcore];
1609 int remain = (b<NUMCORES4GC) ?
1610 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1611 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1612 // check if the top core finishes
1613 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1614 if(gccorestatus[gctopcore] != 0) {
1616 BAMBOO_DEBUGPRINT(0xd101);
1617 BAMBOO_DEBUGPRINT_REG(gctopcore);
1619 // let the top core finishes its own work first
1620 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1621 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1624 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1627 BAMBOO_DEBUGPRINT(0xd102);
1628 BAMBOO_DEBUGPRINT_REG(gctopcore);
1629 BAMBOO_DEBUGPRINT_REG(p);
1630 BAMBOO_DEBUGPRINT_REG(b);
1631 BAMBOO_DEBUGPRINT_REG(remain);
1633 for(int i = 0; i < NUMCORES4GC; i++) {
1634 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1635 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1637 BAMBOO_DEBUGPRINT(0xd103);
1639 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1640 if(gccorestatus[gctopcore] != 0) {
1642 BAMBOO_DEBUGPRINT(0xd101);
1643 BAMBOO_DEBUGPRINT_REG(gctopcore);
1645 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1646 // the top core is not free now
1649 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1650 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1651 } // for(i = 0; i < NUMCORES4GC; i++)
1653 BAMBOO_DEBUGPRINT(0xd106);
1655 } // void compact2Heaptop()
1657 inline void resolvePendingMoveRequest() {
1659 BAMBOO_DEBUGPRINT(0xeb01);
1662 BAMBOO_DEBUGPRINT(0xeeee);
1663 for(int k = 0; k < NUMCORES4GC; k++) {
1664 BAMBOO_DEBUGPRINT(0xf000+k);
1665 BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1666 BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1667 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1668 BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1670 BAMBOO_DEBUGPRINT(0xffff);
1674 bool nosparemem = true;
1675 bool haspending = false;
1676 bool hasrunning = false;
1677 bool noblock = false;
1678 int dstcore = 0; // the core who need spare mem
1679 int sourcecore = 0; // the core who has spare mem
1680 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1682 // check if there are cores with spare mem
1683 if(gccorestatus[i] == 0) {
1684 // finished working, check if it still have spare mem
1685 if(gcfilledblocks[i] < gcstopblock[i]) {
1686 // still have spare mem
1689 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1694 if(gccorestatus[j] != 0) {
1695 // not finished, check if it has pending move requests
1696 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1701 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1702 } // if(gccorestatus[i] == 0) else ...
1704 } // if(!haspending)
1705 if(!nosparemem && haspending) {
1709 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1710 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1711 gcrequiredmems[dstcore],
1714 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1716 BAMBOO_DEBUGPRINT(0xeb02);
1717 BAMBOO_DEBUGPRINT_REG(sourcecore);
1718 BAMBOO_DEBUGPRINT_REG(dstcore);
1719 BAMBOO_DEBUGPRINT_REG(startaddr);
1720 BAMBOO_DEBUGPRINT_REG(tomove);
1722 if(STARTUPCORE == dstcore) {
1724 BAMBOO_DEBUGPRINT(0xeb03);
1726 gcdstcore = sourcecore;
1728 gcmovestartaddr = startaddr;
1729 gcblock2fill = tomove;
1732 BAMBOO_DEBUGPRINT(0xeb04);
1734 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1735 startaddr, tomove, false);
1742 } // for(i = 0; i < NUMCORES4GC; i++)
1744 BAMBOO_DEBUGPRINT(0xcccc);
1745 BAMBOO_DEBUGPRINT_REG(hasrunning);
1746 BAMBOO_DEBUGPRINT_REG(haspending);
1747 BAMBOO_DEBUGPRINT_REG(noblock);
1750 if(!hasrunning && !noblock) {
1751 gcphase = SUBTLECOMPACTPHASE;
1755 } // void resovePendingMoveRequest()
1758 int numblocks; // block num for heap
1759 INTPTR base; // base virtual address of current heap block
1760 INTPTR ptr; // virtual address of current heap top
1761 int offset; // offset in current heap block
1762 int blockbase; // virtual address of current small block to check
1763 int blockbound; // bound virtual address of current small blcok
1764 int sblockindex; // index of the small blocks
1765 int top; // real size of current heap block to check
1766 int bound; // bound size of current heap block to check
1767 }; // struct moveHelper
1769 // If out of boundary of valid shared memory, return false, else return true
1770 inline bool nextSBlock(struct moveHelper * orig) {
1771 orig->blockbase = orig->blockbound;
1772 bool sbchanged = false;
1774 BAMBOO_DEBUGPRINT(0xecc0);
1775 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1776 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1777 BAMBOO_DEBUGPRINT_REG(orig->bound);
1778 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1781 // check if across a big block
1782 // TODO now do not zero out the whole memory, maybe the last two conditions
1784 if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
1785 || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
1786 || ((*((int*)orig->blockbase))==0)) {
1788 // end of current heap block, jump to next one
1791 BAMBOO_DEBUGPRINT(0xecc1);
1792 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1794 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1796 BAMBOO_DEBUGPRINT(orig->base);
1798 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1800 orig->ptr = orig->base; // set current ptr to out of boundary too
1803 //orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1804 orig->blockbase = orig->base;
1805 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1808 BLOCKINDEX(orig->base, &blocknum);
1809 if(bamboo_smemtbl[blocknum] == 0) {
1811 goto innernextSBlock;
1813 // check the bamboo_smemtbl to decide the real bound
1814 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1815 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1816 orig->sblockindex += 1;
1818 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1820 // check if this sblock should be skipped or have special start point
1821 if(gcsbstarttbl[orig->sblockindex] == -1) {
1824 BAMBOO_DEBUGPRINT(0xecc2);
1826 orig->sblockindex += 1;
1827 orig->blockbase += BAMBOO_SMEM_SIZE;
1828 goto outernextSBlock;
1829 } else if((gcsbstarttbl[orig->sblockindex] != 0)
1831 // the first time to access this SBlock
1833 BAMBOO_DEBUGPRINT(0xecc3);
1835 // not start from the very beginning
1836 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1837 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1839 // setup information for this sblock
1840 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1841 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1842 orig->ptr = orig->blockbase + orig->offset;
1844 BAMBOO_DEBUGPRINT(0xecc4);
1845 BAMBOO_DEBUGPRINT_REG(orig->base);
1846 BAMBOO_DEBUGPRINT_REG(orig->bound);
1847 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1848 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1849 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1850 BAMBOO_DEBUGPRINT_REG(orig->offset);
1852 if(orig->ptr >= orig->bound) {
1853 // met a lobj, move to next block
1854 goto innernextSBlock;
1858 } // bool nextSBlock(struct moveHelper * orig)
1860 // return false if there are no available data to compact
1861 inline bool initOrig_Dst(struct moveHelper * orig,
1862 struct moveHelper * to) {
1865 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1866 to->bound = BAMBOO_SMEM_SIZE_L;
1867 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1870 BAMBOO_DEBUGPRINT(0xef01);
1871 BAMBOO_DEBUGPRINT_REG(to->base);
1873 to->ptr = to->base + to->offset;
1874 #ifdef GC_CACHE_ADAPT
1875 // initialize the gc_cache_revise_information
1876 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1877 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1878 ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1879 gc_cache_revise_infomation.to_page_index =
1880 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
1881 gc_cache_revise_infomation.orig_page_start_va = -1;
1882 #endif // GC_CACHE_ADAPT
1884 // init the orig ptr
1885 orig->numblocks = 0;
1886 orig->base = to->base;
1888 BLOCKINDEX(orig->base, &blocknum);
1889 // check the bamboo_smemtbl to decide the real bound
1890 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1891 orig->blockbase = orig->base;
1892 orig->sblockindex = (orig->base - gcbaseva) / BAMBOO_SMEM_SIZE;
1894 BAMBOO_DEBUGPRINT(0xef02);
1895 BAMBOO_DEBUGPRINT_REG(orig->base);
1896 BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1897 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1898 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1901 if(gcsbstarttbl[orig->sblockindex] == -1) {
1903 BAMBOO_DEBUGPRINT(0xef03);
1907 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1908 return nextSBlock(orig);
1909 } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1911 BAMBOO_DEBUGPRINT(0xef04);
1913 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1916 BAMBOO_DEBUGPRINT(0xef05);
1918 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1919 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1920 orig->ptr = orig->blockbase + orig->offset;
1922 BAMBOO_DEBUGPRINT(0xef06);
1923 BAMBOO_DEBUGPRINT_REG(orig->base);
1927 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1929 inline void nextBlock(struct moveHelper * to) {
1930 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1931 to->bound += BAMBOO_SMEM_SIZE;
1933 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1934 to->offset = BAMBOO_CACHE_LINE_SIZE;
1935 to->ptr = to->base + to->offset;
1936 } // void nextBlock(struct moveHelper * to)
1938 // endaddr does not contain spaces for headers
1939 inline bool moveobj(struct moveHelper * orig,
1940 struct moveHelper * to,
1942 if(stopblock == 0) {
1947 BAMBOO_DEBUGPRINT(0xe201);
1948 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949 BAMBOO_DEBUGPRINT_REG(to->ptr);
1957 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1958 orig->ptr = (int*)(orig->ptr) + 1;
1960 #ifdef GC_CACHE_ADAPT
1961 if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
1962 // end of an orig page
1963 // compute the impact of this page for the new page
1964 int tmp_factor = to->ptr-gc_cache_revise_infomation.to_page_start_va;
1965 int topage=gc_cache_revise_infomation.to_page_index;
1966 int oldpage = gc_cache_revise_infomation.orig_page_index;
1967 int * newtable=&gccachesamplingtbl_r[topage];
1968 int * oldtable=&gccachesamplingtbl[oldpage];
1970 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1971 (*newtable) += (*oldtable)*tmp_factor;
1972 newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1973 oldtable=(int*)(((char *)oldtable)+size_cachesamplingtbl_local);
1975 // prepare for an new orig page
1976 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1977 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1978 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1979 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1980 gc_cache_revise_infomation.orig_page_index = tmp_index;
1981 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1984 if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
1985 if(!nextSBlock(orig)) {
1986 // finished, no more data
1992 BAMBOO_DEBUGPRINT(0xe202);
1993 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1994 BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1996 // check the obj's type, size and mark flag
1997 type = ((int *)(orig->ptr))[0];
2000 // end of this block, go to next one
2001 if(!nextSBlock(orig)) {
2002 // finished, no more data
2006 } else if(type < NUMCLASSES) {
2008 size = classsize[type];
2011 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
2012 int elementsize=classsize[type];
2013 int length=ao->___length___;
2014 size=sizeof(struct ArrayObject)+length*elementsize;
2016 mark = ((int *)(orig->ptr))[6];
2017 bool isremote = ((((int *)(orig->ptr))[6] & REMOTEM) != 0);
2019 BAMBOO_DEBUGPRINT(0xe203);
2020 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2021 BAMBOO_DEBUGPRINT_REG(size);
2023 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
2024 // should be able to across it
2025 if((mark & MARKED) != 0) {
2027 BAMBOO_DEBUGPRINT(0xe204);
2032 // marked obj, copy it to current heap top
2033 // check to see if remaining space is enough
2034 if(to->top + isize > to->bound) {
2035 // fill 0 indicating the end of this block
2036 BAMBOO_MEMSET_WH(to->ptr, '\0', to->bound - to->top);
2037 // fill the header of this block and then go to next block
2038 to->offset += to->bound - to->top;
2039 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2040 (*((int*)(to->base))) = to->offset;
2041 #ifdef GC_CACHE_ADAPT
2042 int tmp_ptr = to->ptr;
2043 #endif // GC_CACHE_ADAPT
2045 #ifdef GC_CACHE_ADAPT
2046 if((to->ptr) >= gc_cache_revise_infomation.to_page_end_va) {
2047 // end of an to page, wrap up its information
2048 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2049 int topage=gc_cache_revise_infomation.to_page_index;
2050 int oldpage = gc_cache_revise_infomation.orig_page_index;
2051 int * newtable=&gccachesamplingtbl_r[topage];
2052 int * oldtable=&gccachesamplingtbl[oldpage];
2054 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2055 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2056 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2057 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2059 // prepare for an new to page
2060 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2061 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2062 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2063 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
2064 gc_cache_revise_infomation.orig_page_index = tmp_index;
2065 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2066 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2067 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2068 gc_cache_revise_infomation.to_page_index =
2069 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2071 #endif // GC_CACHE_ADAPT
2072 if(stopblock == to->numblocks) {
2073 // already fulfilled the block
2075 } // if(stopblock == to->numblocks)
2076 } // if(to->top + isize > to->bound)
2077 // set the mark field to 2, indicating that this obj has been moved
2078 // and need to be flushed
2079 ((int *)(orig->ptr))[6] = COMPACTED;
2080 if(to->ptr != orig->ptr) {
2081 if((int)(orig->ptr) < (int)(to->ptr)+size) {
2082 memmove(to->ptr, orig->ptr, size);
2084 //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
2085 memcpy(to->ptr, orig->ptr, size);
2087 // fill the remaining space with -2
2088 BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
2090 // store mapping info
2091 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2092 #ifdef LOCALHASHTBL_TEST
2093 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2095 mgchashInsert_I(gcpointertbl, orig->ptr, to->ptr);
2097 //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2099 // add to the sharedptbl
2100 if(gcsharedptbl != NULL) {
2101 //GCSharedHashadd_I(gcsharedptbl, orig->ptr, to->ptr);
2102 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
2105 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2108 BAMBOO_DEBUGPRINT(0xcdce);
2109 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2110 BAMBOO_DEBUGPRINT_REG(to->ptr);
2111 BAMBOO_DEBUGPRINT_REG(isize);
2113 gccurr_heaptop -= isize;
2115 to->offset += isize;
2117 #ifdef GC_CACHE_ADAPT
2118 int tmp_ptr = to->ptr;
2119 #endif // GC_CACHE_ADAPT
2120 if(to->top == to->bound) {
2121 // fill the header of this block and then go to next block
2122 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2123 (*((int*)(to->base))) = to->offset;
2126 #ifdef GC_CACHE_ADAPT
2127 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2128 // end of an to page, wrap up its information
2129 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2130 int topage=gc_cache_revise_infomation.to_page_index;
2131 int oldpage = gc_cache_revise_infomation.orig_page_index;
2132 int * newtable=&gccachesamplingtbl_r[topage];
2133 int * oldtable=&gccachesamplingtbl[oldpage];
2135 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2136 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2137 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2138 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2140 // prepare for an new to page
2141 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2142 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2143 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2144 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2145 gc_cache_revise_infomation.orig_page_index =
2146 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2147 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2148 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2149 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2150 gc_cache_revise_infomation.to_page_index =
2151 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2153 #endif // GC_CACHE_ADAPT
2156 #ifdef GC_CACHE_ADAPT
2157 if((to->ptr) >= gc_cache_revise_infomation.to_page_end_va) {
2158 // end of an to page, wrap up its information
2159 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2160 int topage=gc_cache_revise_infomation.to_page_index;
2161 int oldpage = gc_cache_revise_infomation.orig_page_index;
2162 int * newtable=&gccachesamplingtbl_r[topage];
2163 int * oldtable=&gccachesamplingtbl[oldpage];
2165 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2166 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2167 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2168 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2170 // prepare for an new to page
2171 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2172 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2173 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2174 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2175 gc_cache_revise_infomation.orig_page_index =
2176 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2177 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2178 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2179 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2180 gc_cache_revise_infomation.to_page_index =
2181 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2183 #endif // GC_CACHE_ADAPT
2186 BAMBOO_DEBUGPRINT(0xe205);
2192 BAMBOO_DEBUGPRINT_REG(isize);
2193 BAMBOO_DEBUGPRINT_REG(size);
2194 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2195 BAMBOO_DEBUGPRINT_REG(orig->bound);
2197 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2199 BAMBOO_DEBUGPRINT(0xe206);
2201 if(!nextSBlock(orig)) {
2202 // finished, no more data
2207 BAMBOO_DEBUGPRINT(0xe207);
2208 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2211 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2213 // should be invoked with interrupt closed
2214 inline int assignSpareMem_I(int sourcecore,
2219 BLOCKINDEX(gcloads[sourcecore], &b);
2220 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2221 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2222 int remain = boundptr - gcloads[sourcecore];
2223 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2224 *startaddr = gcloads[sourcecore];
2225 *tomove = gcfilledblocks[sourcecore] + 1;
2226 if(memneed < remain) {
2227 gcloads[sourcecore] += memneed;
2230 // next available block
2231 gcfilledblocks[sourcecore] += 1;
2233 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2234 gcloads[sourcecore] = newbase;
2235 return requiredmem-remain;
2237 } // int assignSpareMem_I(int ,int * , int * , int * )
2239 // should be invoked with interrupt closed
2240 inline bool gcfindSpareMem_I(int * startaddr,
2245 for(int k = 0; k < NUMCORES4GC; k++) {
2246 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2247 // check if this stopped core has enough mem
2248 assignSpareMem_I(k, requiredmem, tomove, startaddr);
2253 // if can not find spare mem right now, hold the request
2254 gcrequiredmems[requiredcore] = requiredmem;
2257 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2259 inline bool compacthelper(struct moveHelper * orig,
2260 struct moveHelper * to,
2263 bool * localcompact) {
2264 // scan over all objs in this block, compact the marked objs
2265 // loop stop when finishing either scanning all active objs or
2266 // fulfilled the gcstopblock
2268 BAMBOO_DEBUGPRINT(0xe101);
2269 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2270 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2273 while(orig->ptr < gcmarkedptrbound) {
2274 bool stop = moveobj(orig, to, gcblock2fill);
2279 #ifdef GC_CACHE_ADAPT
2280 // end of an to page, wrap up its information
2281 int tmp_factor = to->ptr-gc_cache_revise_infomation.to_page_start_va;
2282 int topage=gc_cache_revise_infomation.to_page_index;
2283 int oldpage = gc_cache_revise_infomation.orig_page_index;
2284 int * newtable=&gccachesamplingtbl_r[topage];
2285 int * oldtable=&gccachesamplingtbl[oldpage];
2287 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2288 (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
2289 newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2290 oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2292 #endif // GC_CACHE_ADAPT
2293 // if no objs have been compact, do nothing,
2294 // otherwise, fill the header of this block
2295 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2296 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2297 (*((int*)(to->base))) = to->offset;
2301 to->top -= BAMBOO_CACHE_LINE_SIZE;
2302 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2304 *heaptopptr = to->ptr;
2305 *filledblocks = to->numblocks;
2308 BAMBOO_DEBUGPRINT(0xe102);
2309 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2310 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2311 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2312 BAMBOO_DEBUGPRINT_REG(*filledblocks);
2313 BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2316 // send msgs to core coordinator indicating that the compact is finishing
2317 // send compact finish message to core coordinator
2318 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2319 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2320 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2321 if(orig->ptr < gcmarkedptrbound) {
2323 BAMBOO_DEBUGPRINT(0xe103);
2327 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2328 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2329 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2331 BAMBOO_DEBUGPRINT(0xe104);
2335 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2337 BAMBOO_DEBUGPRINT(0xe105);
2341 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2344 BAMBOO_DEBUGPRINT(0xe106);
2346 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2351 if(orig->ptr < gcmarkedptrbound) {
2353 BAMBOO_DEBUGPRINT(0xe107);
2357 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2358 *filledblocks, *heaptopptr, gccurr_heaptop, false);
2361 BAMBOO_DEBUGPRINT(0xe108);
2362 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2364 // finish compacting
2365 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2366 *filledblocks, *heaptopptr, 0, false);
2368 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2370 if(orig->ptr < gcmarkedptrbound) {
2372 BAMBOO_DEBUGPRINT(0xe109);
2374 // still have unpacked obj
2383 BAMBOO_DEBUGPRINT(0xe10a);
2386 to->ptr = gcmovestartaddr;
2387 to->numblocks = gcblock2fill - 1;
2388 to->bound = (to->numblocks==0) ?
2389 BAMBOO_SMEM_SIZE_L :
2390 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2391 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2392 to->offset = to->ptr - to->base;
2393 to->top = (to->numblocks==0) ?
2394 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2396 to->offset = BAMBOO_CACHE_LINE_SIZE;
2397 to->ptr += to->offset; // for header
2398 to->top += to->offset;
2399 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2400 *localcompact = true;
2402 *localcompact = false;
2404 #ifdef GC_CACHE_ADAPT
2405 // initialize the gc_cache_revise_information
2406 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2407 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2408 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2409 gc_cache_revise_infomation.to_page_index =
2410 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2411 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2412 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2413 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2414 gc_cache_revise_infomation.orig_page_index =
2415 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2416 #endif // GC_CACHE_ADAPT
2420 BAMBOO_DEBUGPRINT(0xe10b);
2423 } // void compacthelper()
2425 inline void compact() {
2426 if(COMPACTPHASE != gcphase) {
2427 BAMBOO_EXIT(0xb102);
2430 // initialize pointers for comapcting
2431 struct moveHelper * orig =
2432 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2433 struct moveHelper * to =
2434 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2436 if(!initOrig_Dst(orig, to)) {
2437 // no available data to compact
2438 // send compact finish msg to STARTUP core
2440 BAMBOO_DEBUGPRINT(0xe001);
2441 BAMBOO_DEBUGPRINT_REG(to->base);
2443 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2444 0, to->base, 0, false);
2449 #ifdef GC_CACHE_ADAPT
2450 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2451 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2452 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2453 gc_cache_revise_infomation.orig_page_index =
2454 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2455 #endif // GC_CACHE_ADAPT
2457 int filledblocks = 0;
2458 INTPTR heaptopptr = 0;
2459 bool localcompact = true;
2460 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2466 // if return NULL, means
2467 // 1. objptr is NULL
2468 // 2. objptr is not a shared obj
2469 // in these cases, remain the original value is OK
2470 inline void * flushObj(void * objptr) {
2472 BAMBOO_DEBUGPRINT(0xe401);
2474 if(objptr == NULL) {
2477 void * dstptr = NULL;
2478 if(ISSHAREDOBJ(objptr)) {
2480 BAMBOO_DEBUGPRINT(0xe402);
2481 BAMBOO_DEBUGPRINT_REG(objptr);
2483 // a shared obj ptr, change to new address
2484 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2486 //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
2488 #ifdef LOCALHASHTBL_TEST
2489 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2491 dstptr = mgchashSearch(gcpointertbl, objptr);
2493 //MGCHashget(gcpointertbl, objptr, &dstptr);
2495 //flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
2497 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2499 BAMBOO_DEBUGPRINT_REG(dstptr);
2502 if(NULL == dstptr) {
2505 BAMBOO_DEBUGPRINT(0xe403);
2506 BAMBOO_DEBUGPRINT_REG(objptr);
2507 BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2509 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2510 // error! the obj is right on this core, but cannot find it
2511 //BAMBOO_DEBUGPRINT(0xecec);
2512 BAMBOO_DEBUGPRINT_REG(objptr);
2513 BAMBOO_EXIT(0xb103);
2514 // assume that the obj has not been moved, use the original address
2517 int hostc = hostcore(objptr);
2519 //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
2521 // check the corresponsing sharedptbl
2522 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2523 //struct GCSharedHash * sptbl = gcrpointertbls[hostcore(objptr)];
2524 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2526 //GCSharedHashget(sptbl, (int)objptr, &dstptr);
2527 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2528 if(dstptr != NULL) {
2529 #ifdef LOCALHASHTBL_TEST
2530 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2532 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2536 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2538 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2541 if(dstptr == NULL) {
2542 // still can not get the mapping info,
2543 // send msg to host core for the mapping info
2544 gcobj2map = (int)objptr;
2547 // the first time require the mapping, send msg to the hostcore
2548 // for the mapping info
2549 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2550 BAMBOO_NUM_OF_CORE, false);
2557 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2559 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2560 #ifdef LOCALHASHTBL_TEST
2561 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2563 dstptr = mgchashSearch(gcpointertbl, objptr);
2565 //MGCHashget(gcpointertbl, objptr, &dstptr);
2566 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2567 } // if(dstptr == NULL)
2568 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2570 BAMBOO_DEBUGPRINT_REG(dstptr);
2572 } // if(NULL == dstptr)
2573 } // if(ISSHAREDOBJ(objptr))
2574 // if not a shared obj, return NULL to indicate no need to flush
2576 BAMBOO_DEBUGPRINT(0xe404);
2579 } // void flushObj(void * objptr)
2581 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2583 // flush current stack
2584 while(stackptr!=NULL) {
2585 for(i=0; i<stackptr->size; i++) {
2586 if(stackptr->array[i] != NULL) {
2587 void * dst = flushObj(stackptr->array[i]);
2589 stackptr->array[i] = dst;
2593 stackptr=stackptr->next;
2597 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2598 for(i=0; i<NUMCLASSES; i++) {
2599 struct parameterwrapper ** queues =
2600 objectqueues[BAMBOO_NUM_OF_CORE][i];
2601 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2602 for(j = 0; j < length; ++j) {
2603 struct parameterwrapper * parameter = queues[j];
2604 struct ObjectHash * set=parameter->objectset;
2605 struct ObjectNode * ptr=set->listhead;
2607 void * dst = flushObj((void *)ptr->key);
2613 ObjectHashrehash(set);
2618 // flush current task descriptor
2619 if(currtpd != NULL) {
2620 for(i=0; i<currtpd->numParameters; i++) {
2621 void * dst = flushObj(currtpd->parameterArray[i]);
2623 currtpd->parameterArray[i] = dst;
2628 // flush active tasks
2629 if(activetasks != NULL) {
2630 struct genpointerlist * ptr=activetasks->list;
2632 struct taskparamdescriptor *tpd=ptr->src;
2634 for(i=0; i<tpd->numParameters; i++) {
2635 void * dst = flushObj(tpd->parameterArray[i]);
2637 tpd->parameterArray[i] = dst;
2642 genrehash(activetasks);
2645 // flush cached transferred obj
2646 struct QueueItem * tmpobjptr = getHead(&objqueue);
2647 while(tmpobjptr != NULL) {
2648 struct transObjInfo * objInfo =
2649 (struct transObjInfo *)(tmpobjptr->objectptr);
2650 void * dst = flushObj(objInfo->objptr);
2652 objInfo->objptr = dst;
2654 tmpobjptr = getNextQueueItem(tmpobjptr);
2657 // flush cached objs to be transferred
2658 struct QueueItem * item = getHead(totransobjqueue);
2659 while(item != NULL) {
2660 struct transObjInfo * totransobj =
2661 (struct transObjInfo *)(item->objectptr);
2662 void * dst = flushObj(totransobj->objptr);
2664 totransobj->objptr = dst;
2666 item = getNextQueueItem(item);
2667 } // while(item != NULL)
2669 // enqueue lock related info
2670 for(i = 0; i < runtime_locklen; ++i) {
2671 void * dst = flushObj(runtime_locks[i].redirectlock);
2673 runtime_locks[i].redirectlock = (int)dst;
2675 if(runtime_locks[i].value != NULL) {
2676 void * dst=flushObj(runtime_locks[i].value);
2678 runtime_locks[i].value = (int)dst;
2683 } // void flushRuntimeObj(struct garbagelist * stackptr)
2685 inline void transmappinginfo() {
2686 // broadcast the sharedptbl pointer
2687 for(int i = 0; i < NUMCORESACTIVE; i++) {
2688 if(i != BAMBOO_NUM_OF_CORE) {
2689 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2693 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2694 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2698 inline void flush(struct garbagelist * stackptr) {
2700 flushRuntimeObj(stackptr);
2703 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2704 bool hasItems = gc_moreItems_I();
2705 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2711 BAMBOO_DEBUGPRINT(0xe301);
2713 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2714 void * ptr = gc_dequeue_I();
2715 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2716 if(ISSHAREDOBJ(ptr)) {
2717 // should be a local shared obj and should have mapping info
2718 ptr = flushObj(ptr);
2720 BAMBOO_DEBUGPRINT(0xe302);
2721 BAMBOO_DEBUGPRINT_REG(ptr);
2722 BAMBOO_DEBUGPRINT_REG(tptr);
2723 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2726 BAMBOO_EXIT(0xb105);
2728 } // if(ISSHAREDOBJ(ptr))
2729 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2730 int type = ((int *)(ptr))[0];
2731 // scan all pointers in ptr
2732 unsigned INTPTR * pointer;
2733 pointer=pointerarray[type];
2735 BAMBOO_DEBUGPRINT(0xe303);
2736 BAMBOO_DEBUGPRINT_REG(pointer);
2739 /* Array of primitives */
2741 } else if (((INTPTR)pointer)==1) {
2743 BAMBOO_DEBUGPRINT(0xe304);
2745 /* Array of pointers */
2746 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2747 int length=ao->___length___;
2749 for(j=0; j<length; j++) {
2751 BAMBOO_DEBUGPRINT(0xe305);
2754 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2756 BAMBOO_DEBUGPRINT_REG(objptr);
2758 if(objptr != NULL) {
2759 void * dst = flushObj(objptr);
2761 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2767 BAMBOO_DEBUGPRINT(0xe306);
2769 INTPTR size=pointer[0];
2771 for(i=1; i<=size; i++) {
2773 BAMBOO_DEBUGPRINT(0xe307);
2775 unsigned int offset=pointer[i];
2776 void * objptr=*((void **)(((char *)ptr)+offset));
2778 BAMBOO_DEBUGPRINT_REG(objptr);
2780 if(objptr != NULL) {
2781 void * dst = flushObj(objptr);
2783 *((void **)(((char *)ptr)+offset)) = dst;
2786 } // for(i=1; i<=size; i++)
2787 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2788 // restore the mark field, indicating that this obj has been flushed
2789 if(ISSHAREDOBJ(ptr)) {
2790 ((int *)(ptr))[6] = INIT;
2792 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2793 } // while(gc_moreItems())
2795 BAMBOO_DEBUGPRINT(0xe308);
2798 // TODO bug here: the startup core contains all lobjs' info, thus all the
2799 // lobjs are flushed in sequence.
2801 while(gc_lobjmoreItems_I()) {
2803 BAMBOO_DEBUGPRINT(0xe309);
2805 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2806 ptr = flushObj(ptr);
2808 BAMBOO_DEBUGPRINT(0xe30a);
2809 BAMBOO_DEBUGPRINT_REG(ptr);
2810 BAMBOO_DEBUGPRINT_REG(tptr);
2811 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2814 BAMBOO_EXIT(0xb106);
2816 if(((int *)(ptr))[6] == COMPACTED) {
2817 int type = ((int *)(ptr))[0];
2818 // scan all pointers in ptr
2819 unsigned INTPTR * pointer;
2820 pointer=pointerarray[type];
2822 BAMBOO_DEBUGPRINT(0xe30b);
2823 BAMBOO_DEBUGPRINT_REG(pointer);
2826 /* Array of primitives */
2828 } else if (((INTPTR)pointer)==1) {
2830 BAMBOO_DEBUGPRINT(0xe30c);
2832 /* Array of pointers */
2833 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2834 int length=ao->___length___;
2836 for(j=0; j<length; j++) {
2838 BAMBOO_DEBUGPRINT(0xe30d);
2841 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2843 BAMBOO_DEBUGPRINT_REG(objptr);
2845 if(objptr != NULL) {
2846 void * dst = flushObj(objptr);
2848 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2854 BAMBOO_DEBUGPRINT(0xe30e);
2856 INTPTR size=pointer[0];
2858 for(i=1; i<=size; i++) {
2860 BAMBOO_DEBUGPRINT(0xe30f);
2862 unsigned int offset=pointer[i];
2863 void * objptr=*((void **)(((char *)ptr)+offset));
2866 BAMBOO_DEBUGPRINT_REG(objptr);
2868 if(objptr != NULL) {
2869 void * dst = flushObj(objptr);
2871 *((void **)(((char *)ptr)+offset)) = dst;
2874 } // for(i=1; i<=size; i++)
2875 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2876 // restore the mark field, indicating that this obj has been flushed
2877 ((int *)(ptr))[6] = INIT;
2878 } // if(((int *)(ptr))[6] == COMPACTED)
2879 } // while(gc_lobjmoreItems())
2881 BAMBOO_DEBUGPRINT(0xe310);
2884 // send flush finish message to core coordinator
2885 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2886 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2888 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2891 BAMBOO_DEBUGPRINT(0xe311);
2895 #ifdef GC_CACHE_ADAPT
2896 // prepare for cache adaption:
2897 // -- flush the shared heap
2898 // -- clean dtlb entries
2899 // -- change cache strategy
2900 void cacheAdapt_gc(bool isgccachestage) {
2901 // flush the shared heap
2902 BAMBOO_CACHE_FLUSH_L2();
2904 // clean the dtlb entries
2905 BAMBOO_CLEAN_DTLB();
2907 // change the cache strategy
2908 gccachestage = isgccachestage;
2909 } // cacheAdapt_gc(bool isgccachestage)
2911 // the master core decides how to adapt cache strategy for the mutator
2912 // according to collected statistic data
2914 // make all pages hfh
2915 int cacheAdapt_policy_h4h(){
2916 unsigned int page_index = 0;
2918 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2920 int * tmp_p = gccachepolicytbl+1;
2921 for(page_index = 0; page_index < page_num; page_index++) {
2922 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2923 bamboo_cache_policy_t policy = {0};
2924 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2925 *tmp_p = page_index;
2927 *tmp_p = policy.word;
2933 } // int cacheAdapt_policy_hfh()
2935 // make all pages local as non-cache-adaptable gc local mode
2936 int cacheAdapt_policy_local(){
2937 unsigned int page_index = 0;
2939 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2941 int * tmp_p = gccachepolicytbl+1;
2942 for(page_index = 0; page_index < page_num; page_index++) {
2943 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2944 bamboo_cache_policy_t policy = {0};
2946 BLOCKINDEX(page_sva, &block);
2947 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2948 // locally cache the page in the hotest core
2949 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2950 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2951 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2952 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2953 *tmp_p = page_index;
2955 *tmp_p = policy.word;
2961 } // int cacheAdapt_policy_local()
2963 int cacheAdapt_policy_hotest(){
2964 unsigned int page_index = 0;
2966 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2968 int * tmp_p = gccachepolicytbl+1;
2969 for(page_index = 0; page_index < page_num; page_index++) {
2970 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2971 bamboo_cache_policy_t policy = {0};
2975 int *local_tbl=&gccachesamplingtbl_r[page_index];
2976 for(int i = 0; i < NUMCORESACTIVE; i++) {
2977 int freq = *local_tbl;
2978 local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
2980 // check the freqency, decide if this page is hot for the core
2981 if(hotfreq < freq) {
2987 // Decide the cache strategy for this page
2988 // If decide to adapt a new cache strategy, write into the shared block of
2989 // the gcsharedsamplingtbl. The mem recording information that has been
2990 // written is enough to hold the information.
2991 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2993 // this page has not been accessed, do not change its cache policy
2996 // locally cache the page in the hotest core
2997 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2998 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2999 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3000 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3001 *tmp_p = page_index;
3003 *tmp_p = policy.word;
3010 } // int cacheAdapt_policy_hotest()
3012 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
3013 // cache the page on the core that accesses it the most if that core accesses
3014 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
3016 int cacheAdapt_policy_dominate(){
3017 unsigned int page_index = 0;
3019 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3021 int * tmp_p = gccachepolicytbl+1;
3022 for(page_index = 0; page_index < page_num; page_index++) {
3023 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3024 bamboo_cache_policy_t policy = {0};
3029 int *local_tbl=&gccachesamplingtbl_r[page_index];
3030 for(int i = 0; i < NUMCORESACTIVE; i++) {
3031 int freq = *local_tbl;
3032 local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3035 // check the freqency, decide if this page is hot for the core
3036 if(hotfreq < freq) {
3042 // Decide the cache strategy for this page
3043 // If decide to adapt a new cache strategy, write into the shared block of
3045 // Format: page start va + cache policy
3047 // this page has not been accessed, do not change its cache policy
3050 totalfreq = (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
3051 hotfreq/=BAMBOO_PAGE_SIZE;
3052 if(hotfreq < totalfreq) {
3054 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3056 // locally cache the page in the hotest core
3057 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3058 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3059 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3060 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3062 *tmp_p = page_index;
3064 *tmp_p = policy.word;
3070 } // int cacheAdapt_policy_dominate()
3072 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 20000
3074 void gc_quicksort(int *array,
3080 int rightIdx = right;
3081 if((right-left+1) >= 1) {
3082 pivot = (left+right)/2;
3083 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
3084 int pivotValue = array[pivot*3-offset];
3085 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
3088 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
3091 // swap [leftIdx] & [rightIdx]
3092 for(int k = 0; k < 3; k++) {
3093 int tmp = array[3*rightIdx-k];
3094 array[3*rightIdx-k] = array[3*leftIdx-k];
3095 array[3*leftIdx-k] = tmp;
3099 if((leftIdx-1) == pivot) {
3100 pivot = rightIdx = rightIdx + 1;
3101 } else if((leftIdx+1) == pivot) {
3102 pivot = leftIdx = leftIdx-1;
3105 gc_quicksort(array, left, pivot-1, offset);
3106 gc_quicksort(array, pivot+1, right, offset);
3109 } // void gc_quicksort(...)
3111 // Every page cached on the core that accesses it the most.
3112 // Check to see if any core's pages total more accesses than threshold
3113 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3114 // most remote accesses and hash for home them until we get below
3115 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
3116 int cacheAdapt_policy_overload(){
3117 unsigned int page_index = 0;
3119 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3121 int * tmp_p = gccachepolicytbl+1;
3122 unsigned long long workload[NUMCORESACTIVE];
3123 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3124 unsigned long long total_workload = 0;
3125 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3126 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3127 for(page_index = 0; page_index < page_num; page_index++) {
3128 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3129 bamboo_cache_policy_t policy = {0};
3134 int *local_tbl=&gccachesamplingtbl_r[page_index];
3135 for(int i = 0; i < NUMCORESACTIVE; i++) {
3136 int freq = *local_tbl;
3137 local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3140 // check the freqency, decide if this page is hot for the core
3141 if(hotfreq < freq) {
3146 /*if(page_sva == 0x10e90000) {
3147 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3151 // Decide the cache strategy for this page
3152 // If decide to adapt a new cache strategy, write into the shared block of
3153 // the gcsharedsamplingtbl. The mem recording information that has been
3154 // written is enough to hold the information.
3155 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3157 // this page has not been accessed, do not change its cache policy
3161 totalfreq/=BAMBOO_PAGE_SIZE;
3162 hotfreq/=BAMBOO_PAGE_SIZE;
3163 // locally cache the page in the hotest core
3164 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3165 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3166 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3167 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3168 *tmp_p = page_index;
3170 *tmp_p = policy.word;
3173 workload[hotestcore] += totalfreq;
3174 total_workload += totalfreq;
3175 // insert into core2heavypages using quicksort
3176 int remoteaccess = totalfreq - hotfreq;
3177 int index = core2heavypages[hotestcore][0];
3178 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3179 core2heavypages[hotestcore][3*index+2] = totalfreq;
3180 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3181 core2heavypages[hotestcore][0]++;
3183 /*if(page_sva == 0x10f10000) {
3185 BLOCKINDEX(page_sva, &block);
3186 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3187 int coord_x = bamboo_cpu2coords[2*coren]+1;
3188 int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3189 tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3193 int workload_threshold = total_workload / 10;
3194 // Check the workload of each core
3195 for(int i = 0; i < NUMCORESACTIVE; i++) {
3197 int index = core2heavypages[i][0];
3198 if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3199 // sort according to the remoteaccess
3200 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3201 while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3202 // hfh those pages with more remote accesses
3203 bamboo_cache_policy_t policy = {0};
3204 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3205 *((int*)core2heavypages[i][j]) = policy.word;
3206 workload[i] -= core2heavypages[i][j+1];
3213 } // int cacheAdapt_policy_overload()
3215 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3216 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
3217 // Every page cached on the core that accesses it the most.
3218 // Check to see if any core's pages total more accesses than threshold
3219 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3220 // most remote accesses and hash for home them until we get below
3221 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3222 // Sort pages based on activity....
3223 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3224 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3225 // then start hfh these pages(selecting the ones with the most remote
3226 // accesses first or fewest local accesses) until we get below
3227 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3228 int cacheAdapt_policy_crowd(){
3229 unsigned int page_index = 0;
3231 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3233 int * tmp_p = gccachepolicytbl+1;
3234 unsigned long long workload[NUMCORESACTIVE];
3235 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3236 unsigned long long total_workload = 0;
3237 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3238 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3239 for(page_index = 0; page_index < page_num; page_index++) {
3240 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3241 bamboo_cache_policy_t policy = {0};
3246 int *local_tbl=&gccachesamplingtbl_r[page_index];
3247 for(int i = 0; i < NUMCORESACTIVE; i++) {
3248 int freq = *local_tbl;
3249 local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3252 // check the freqency, decide if this page is hot for the core
3253 if(hotfreq < freq) {
3258 /*if(page_sva == 0x10e90000) {
3259 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3263 // Decide the cache strategy for this page
3264 // If decide to adapt a new cache strategy, write into the shared block of
3265 // the gcsharedsamplingtbl. The mem recording information that has been
3266 // written is enough to hold the information.
3267 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3269 // this page has not been accessed, do not change its cache policy
3272 totalfreq/=BAMBOO_PAGE_SIZE;
3273 hotfreq/=BAMBOO_PAGE_SIZE;
3274 // locally cache the page in the hotest core
3275 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3276 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3277 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3278 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3279 *tmp_p = page_index;
3281 *tmp_p = policy.word;
3284 workload[hotestcore] += totalfreq;
3285 total_workload += totalfreq;
3286 // insert into core2heavypages using quicksort
3287 int remoteaccess = totalfreq - hotfreq;
3288 int index = core2heavypages[hotestcore][0];
3289 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3290 core2heavypages[hotestcore][3*index+2] = totalfreq;
3291 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3292 core2heavypages[hotestcore][0]++;
3294 /*if(page_sva == 0x10f10000) {
3296 BLOCKINDEX(page_sva, &block);
3297 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3298 int coord_x = bamboo_cpu2coords[2*coren]+1;
3299 int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3300 tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3304 int workload_threshold = total_workload / 10;
3305 // Check the workload of each core
3306 for(int i = 0; i < NUMCORESACTIVE; i++) {
3308 int index = core2heavypages[i][0];
3309 if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3310 // sort according to the remoteaccess
3311 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3312 while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3313 // hfh those pages with more remote accesses
3314 bamboo_cache_policy_t policy = {0};
3315 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3316 *((int*)core2heavypages[i][j]) = policy.word;
3317 workload[i] -= core2heavypages[i][j+1];
3322 // Check if the accesses are crowded on few pages
3323 // sort according to the total access
3325 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3326 int threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3327 int num_crowded = 0;
3330 t_workload += core2heavypages[i][j+num_crowded*3+1];
3332 } while(t_workload < threshold);
3333 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3334 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3335 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
3337 // need to hfh these pages
3338 // sort the pages according to remote access
3339 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3340 //while((num_crowded--) && (j < index*3)) {
3341 // h4h those pages with more remote accesses
3342 bamboo_cache_policy_t policy = {0};
3343 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3344 *((int*)core2heavypages[i][j]) = policy.word;
3345 workload[i] -= core2heavypages[i][j+1];
3346 t_workload -= core2heavypages[i][j+1];
3347 /*if((j/3+GC_CACHE_ADAPT_CROWD_THRESHOLD) < index) {
3349 core2heavypages[i][j+GC_CACHE_ADAPT_CROWD_THRESHOLD*3+1];
3352 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3353 /*if(t_workload <= threshold) {
3357 if((j < index*3) && (t_workload > threshold)) {
3358 num_crowded = ((index-j/3) > GC_CACHE_ADAPT_CROWD_THRESHOLD) ?
3359 (GC_CACHE_ADAPT_CROWD_THRESHOLD) : (index-j/3);*/
3366 } // int cacheAdapt_policy_overload()
3368 void cacheAdapt_master() {
3369 #ifdef GC_CACHE_ADAPT
3370 //gc_output_cache_sampling_r();
3371 #endif // GC_CACHE_ADAPT
3373 // check the statistic data
3374 // for each page, decide the new cache strategy
3375 #ifdef GC_CACHE_ADAPT_POLICY1
3376 numchanged = cacheAdapt_policy_h4h();
3377 #elif defined GC_CACHE_ADAPT_POLICY2
3378 numchanged = cacheAdapt_policy_local();
3379 #elif defined GC_CACHE_ADAPT_POLICY3
3380 numchanged = cacheAdapt_policy_hotest();
3381 #elif defined GC_CACHE_ADAPT_POLICY4
3382 numchanged = cacheAdapt_policy_dominate();
3383 #elif defined GC_CACHE_ADAPT_POLICY5
3384 numchanged = cacheAdapt_policy_overload();
3385 #elif defined GC_CACHE_ADAPT_POLICY6
3386 numchanged = cacheAdapt_policy_crowd();
3388 *gccachepolicytbl = numchanged;
3390 //if(numchanged > 0) tprintf("=================\n");
3393 // adapt the cache strategy for the mutator
3394 void cacheAdapt_mutator() {
3395 int numchanged = *gccachepolicytbl;
3396 // check the changes and adapt them
3397 int * tmp_p = gccachepolicytbl+1;
3398 while(numchanged--) {
3399 // read out the policy
3400 int page_index = *tmp_p;
3401 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3403 /*if(BAMBOO_NUM_OF_CORE == 0) {
3404 tprintf("va: %x, policy: %d (%d,%d) \n",
3405 (int)(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva), policy.cache_mode,
3406 policy.lotar_x, policy.lotar_y);
3409 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3410 policy, BAMBOO_PAGE_SIZE);
3414 //if(BAMBOO_NUM_OF_CORE == 0) tprintf("=================\n"); // TODO
3417 void gc_output_cache_sampling() {
3418 unsigned int page_index = 0;
3420 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3421 for(page_index = 0; page_index < page_num; page_index++) {
3422 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3424 BLOCKINDEX(page_sva, &block);
3425 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3426 tprintf("va: %x page_index: %d host: %d\n",
3427 (int)page_sva, page_index, coren);
3428 for(int i = 0; i < NUMCORESACTIVE; i++) {
3429 int * local_tbl = (int *)((void *)gccachesamplingtbl
3430 +size_cachesamplingtbl_local*i);
3431 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3432 printf("%8d ",freq);
3436 printf("=================\n");
3437 } // gc_output_cache_sampling
3439 void gc_output_cache_sampling_r() {
3440 unsigned int page_index = 0;
3442 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3443 for(page_index = 0; page_index < page_num; page_index++) {
3444 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3446 BLOCKINDEX(page_sva, &block);
3447 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3448 tprintf("va: %x page_index: %d host: %d\n",
3449 (int)page_sva, page_index, coren);
3450 for(int i = 0; i < NUMCORESACTIVE; i++) {
3451 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3452 +size_cachesamplingtbl_local_r*i);
3453 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3454 printf("%8d ",freq);
3458 printf("=================\n");
3459 } // gc_output_cache_sampling
3460 #endif // GC_CACHE_ADAPT
3462 inline void gc_collect(struct garbagelist * stackptr) {
3463 // inform the master that this core is at a gc safe point and is ready to
3465 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3466 self_numreceiveobjs, false);
3468 // core collector routine
3470 if(INITPHASE == gcphase) {
3474 #ifdef RAWPATH // TODO GC_DEBUG
3475 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3478 #ifdef GC_CACHE_ADAPT
3479 // prepare for cache adaption:
3480 cacheAdapt_gc(true);
3481 #endif // GC_CACHE_ADAPT
3482 //send init finish msg to core coordinator
3483 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3486 if(MARKPHASE == gcphase) {
3490 #ifdef RAWPATH // TODO GC_DEBUG
3491 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3492 udn_tile_coord_y());
3494 mark(true, stackptr);
3495 #ifdef RAWPATH // TODO GC_DEBUG
3496 printf("(%x,%x) Finish mark phase, start compact phase\n",
3497 udn_tile_coord_x(), udn_tile_coord_y());
3500 #ifdef RAWPATH // TODO GC_DEBUG
3501 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3502 udn_tile_coord_y());
3506 if(MAPPHASE == gcphase) {
3510 #ifdef RAWPATH // TODO GC_DEBUG
3511 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3512 udn_tile_coord_y());
3515 #ifdef RAWPATH // TODO GC_DEBUG
3516 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3517 udn_tile_coord_y());
3521 if(FLUSHPHASE == gcphase) {
3525 #ifdef RAWPATH // TODO GC_DEBUG
3526 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3527 udn_tile_coord_y());
3530 // send the num of obj/liveobj/forwardobj to the startupcore
3531 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3532 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3533 gc_num_liveobj, gc_num_forwardobj, false);
3536 #endif // GC_PROFLIE
3538 #ifdef RAWPATH // TODO GC_DEBUG
3539 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3540 udn_tile_coord_y());
3543 #ifdef GC_CACHE_ADAPT
3545 if(PREFINISHPHASE == gcphase) {
3549 #ifdef RAWPATH // TODO GC_DEBUG
3550 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3551 udn_tile_coord_y());
3553 // cache adapt phase
3554 cacheAdapt_mutator();
3555 cacheAdapt_gc(false);
3556 //send init finish msg to core coordinator
3557 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3558 #ifdef RAWPATH // TODO GC_DEBUG
3559 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3560 udn_tile_coord_y());
3562 #endif // GC_CACHE_ADAPT
3565 if(FINISHPHASE == gcphase) {
3569 #ifdef RAWPATH // TODO GC_DEBUG
3570 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3572 } // void gc_collect(struct garbagelist * stackptr)
3574 inline void gc_nocollect(struct garbagelist * stackptr) {
3575 // inform the master that this core is at a gc safe point and is ready to
3577 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3578 self_numreceiveobjs, false);
3581 if(INITPHASE == gcphase) {
3585 #ifdef RAWPATH // TODO GC_DEBUG
3586 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3589 #ifdef GC_CACHE_ADAPT
3590 // prepare for cache adaption:
3591 cacheAdapt_gc(true);
3592 #endif // GC_CACHE_ADAPT
3593 //send init finish msg to core coordinator
3594 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3597 if(MARKPHASE == gcphase) {
3601 #ifdef RAWPATH // TODO GC_DEBUG
3602 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3603 udn_tile_coord_y());
3605 mark(true, stackptr);
3606 #ifdef RAWPATH // TODO GC_DEBUG
3607 printf("(%x,%x) Finish mark phase, wait for flush\n",
3608 udn_tile_coord_x(), udn_tile_coord_y());
3611 // non-gc core collector routine
3613 if(FLUSHPHASE == gcphase) {
3617 #ifdef RAWPATH // TODO GC_DEBUG
3618 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3619 udn_tile_coord_y());
3622 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3623 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3624 gc_num_liveobj, gc_num_forwardobj, false);
3627 #endif // GC_PROFLIE
3629 #ifdef RAWPATH // TODO GC_DEBUG
3630 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3631 udn_tile_coord_y());
3634 #ifdef GC_CACHE_ADAPT
3636 if(PREFINISHPHASE == gcphase) {
3640 #ifdef RAWPATH // TODO GC_DEBUG
3641 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3642 udn_tile_coord_y());
3644 // cache adapt phase
3645 cacheAdapt_mutator();
3646 cacheAdapt_gc(false);
3647 //send init finish msg to core coordinator
3648 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3649 #ifdef RAWPATH // TODO GC_DEBUG
3650 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3651 udn_tile_coord_y());
3653 #endif // GC_CACHE_ADAPT
3656 if(FINISHPHASE == gcphase) {
3660 #ifdef RAWPATH // TODO GC_DEBUG
3661 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3663 } // void gc_collect(struct garbagelist * stackptr)
3665 inline void gc_master(struct garbagelist * stackptr) {
3667 gcphase = INITPHASE;
3669 waitconfirm = false;
3673 // Note: all cores need to init gc including non-gc cores
3674 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3675 // send GC init messages to all cores
3676 send_msg_1(i, GCSTARTINIT, false);
3678 bool isfirst = true;
3679 bool allStall = false;
3681 #ifdef GC_CACHE_ADAPT
3682 // prepare for cache adaption:
3683 cacheAdapt_gc(true);
3684 #endif // GC_CACHE_ADAPT
3686 #ifdef RAWPATH // TODO GC_DEBUG
3687 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3688 udn_tile_coord_y());
3691 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3693 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3694 if(gc_checkAllCoreStatus_I()) {
3695 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3698 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3703 #ifdef GC_CACHE_ADAPT
3704 //gc_output_cache_sampling();
3705 #endif // GC_CACHE_ADAPT
3706 #ifdef RAWPATH // TODO GC_DEBUG
3707 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3708 udn_tile_coord_y());
3710 // all cores have finished compacting
3711 // restore the gcstatus of all cores
3712 // Note: all cores have to do mark including non-gc cores
3713 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3714 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3715 gccorestatus[i] = 1;
3716 // send GC start messages to all cores
3717 send_msg_1(i, GCSTART, false);
3720 gcphase = MARKPHASE;
3722 while(MARKPHASE == gcphase) {
3723 mark(isfirst, stackptr);
3730 } // while(MARKPHASE == gcphase)
3731 // send msgs to all cores requiring large objs info
3732 // Note: only need to ask gc cores, non-gc cores do not host any objs
3733 numconfirm = NUMCORES4GC - 1;
3734 for(i = 1; i < NUMCORES4GC; ++i) {
3735 send_msg_1(i, GCLOBJREQUEST, false);
3737 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3742 } // wait for responses
3743 // check the heaptop
3744 if(gcheaptop < gcmarkedptrbound) {
3745 gcheaptop = gcmarkedptrbound;
3750 #ifdef RAWPATH // TODO GC_DEBUG
3751 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3752 udn_tile_coord_y());
3755 // cache all large objs
3757 // no enough space to cache large objs
3758 BAMBOO_EXIT(0xb107);
3760 // predict number of blocks to fill for each core
3762 int numpbc = loadbalance(&tmpheaptop);
3764 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3765 #ifdef RAWPATH // TODO GC_DEBUG
3766 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3767 udn_tile_coord_y());
3770 //int tmptopptr = 0;
3771 //BASEPTR(gctopcore, 0, &tmptopptr);
3773 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3774 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3776 BAMBOO_DEBUGPRINT(0xabab);
3777 BAMBOO_DEBUGPRINT_REG(tmptopptr);
3779 for(i = 0; i < NUMCORES4GC; ++i) {
3781 BASEPTR(i, numpbc, &tmpcoreptr);
3782 //send start compact messages to all cores
3783 //TODO bug here, do not know if the direction is positive or negtive?
3784 if (tmpcoreptr < tmpheaptop /*tmptopptr*/) {
3785 gcstopblock[i] = numpbc + 1;
3786 if(i != STARTUPCORE) {
3787 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3789 gcblock2fill = numpbc+1;
3790 } // if(i != STARTUPCORE)
3792 gcstopblock[i] = numpbc;
3793 if(i != STARTUPCORE) {
3794 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3796 gcblock2fill = numpbc;
3797 } // if(i != STARTUPCORE)
3800 BAMBOO_DEBUGPRINT(0xf000+i);
3801 BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3802 BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3804 // init some data strutures for compact phase
3806 gcfilledblocks[i] = 0;
3807 gcrequiredmems[i] = 0;
3817 bool finalcompact = false;
3818 // initialize pointers for comapcting
3819 struct moveHelper * orig =
3820 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3821 struct moveHelper * to =
3822 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3823 initOrig_Dst(orig, to);
3824 int filledblocks = 0;
3825 INTPTR heaptopptr = 0;
3826 bool finishcompact = false;
3827 bool iscontinue = true;
3828 bool localcompact = true;
3829 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3830 if((!finishcompact) && iscontinue) {
3832 BAMBOO_DEBUGPRINT(0xe001);
3833 BAMBOO_DEBUGPRINT_REG(numpbc);
3834 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3836 finishcompact = compacthelper(orig, to, &filledblocks,
3837 &heaptopptr, &localcompact);
3839 BAMBOO_DEBUGPRINT(0xe002);
3840 BAMBOO_DEBUGPRINT_REG(finishcompact);
3841 BAMBOO_DEBUGPRINT_REG(gctomove);
3842 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3843 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3844 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3848 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3849 if(gc_checkCoreStatus_I()) {
3850 // all cores have finished compacting
3851 // restore the gcstatus of all cores
3852 for(i = 0; i < NUMCORES4GC; ++i) {
3853 gccorestatus[i] = 1;
3855 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3858 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3859 // check if there are spare mem for pending move requires
3860 if(COMPACTPHASE == gcphase) {
3862 BAMBOO_DEBUGPRINT(0xe003);
3864 resolvePendingMoveRequest();
3866 BAMBOO_DEBUGPRINT_REG(gctomove);
3870 BAMBOO_DEBUGPRINT(0xe004);
3874 } // if(gc_checkCoreStatus_I()) else ...
3878 BAMBOO_DEBUGPRINT(0xe005);
3879 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3880 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3881 BAMBOO_DEBUGPRINT_REG(gctomove);
3883 to->ptr = gcmovestartaddr;
3884 to->numblocks = gcblock2fill - 1;
3885 to->bound = (to->numblocks==0) ?
3886 BAMBOO_SMEM_SIZE_L :
3887 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3888 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3889 to->offset = to->ptr - to->base;
3890 to->top = (to->numblocks==0) ?
3891 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3893 to->offset = BAMBOO_CACHE_LINE_SIZE;
3894 to->ptr += to->offset; // for header
3895 to->top += to->offset;
3896 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3897 localcompact = true;
3899 localcompact = false;
3903 } else if(!finishcompact) {
3907 } // while(COMPACTPHASE == gcphase)
3911 #ifdef RAWPATH // TODO GC_DEBUG
3912 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3913 udn_tile_coord_y());
3918 #ifdef RAWPATH // TODO GC_DEBUG
3919 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3920 udn_tile_coord_y());
3928 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3929 // Note: all cores should flush their runtime data including non-gc
3931 for(i = 1; i < NUMCORES4GC; ++i) {
3932 // send start flush messages to all cores
3933 gccorestatus[i] = 1;
3934 send_msg_1(i, GCSTARTMAPINFO, false);
3939 #ifdef RAWPATH // TODO GC_DEBUG
3940 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3941 udn_tile_coord_y());
3945 #ifdef RAWPATH // TODO GC_DEBUG
3946 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3947 udn_tile_coord_y());
3949 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3950 while(MAPPHASE == gcphase) {
3951 // check the status of all cores
3952 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3953 if(gc_checkCoreStatus_I()) {
3954 // all cores have finished sending mapping info
3955 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3958 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3959 } // while(MAPPHASE == gcphase)
3961 gcphase = FLUSHPHASE;
3962 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3963 // Note: all cores should flush their runtime data including non-gc
3965 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3966 // send start flush messages to all cores
3967 gccorestatus[i] = 1;
3968 send_msg_1(i, GCSTARTFLUSH, false);
3973 #ifdef RAWPATH // TODO GC_DEBUG
3974 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3975 udn_tile_coord_y());
3979 #ifdef GC_CACHE_ADAPT
3980 // now the master core need to decide the new cache strategy
3981 cacheAdapt_master();
3982 #endif // GC_CACHE_ADAPT
3984 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3985 while(FLUSHPHASE == gcphase) {
3986 // check the status of all cores
3987 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3988 if(gc_checkAllCoreStatus_I()) {
3989 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3992 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3993 } // while(FLUSHPHASE == gcphase)
3994 #ifdef RAWPATH // TODO GC_DEBUG
3995 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3996 udn_tile_coord_y());
3999 #ifdef GC_CACHE_ADAPT
4003 gcphase = PREFINISHPHASE;
4004 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4005 // Note: all cores should flush their runtime data including non-gc
4007 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
4008 // send start flush messages to all cores
4009 gccorestatus[i] = 1;
4010 send_msg_1(i, GCSTARTPREF, false);
4012 #ifdef RAWPATH // TODO GC_DEBUG
4013 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
4014 udn_tile_coord_y());
4016 // cache adapt phase
4017 cacheAdapt_mutator();
4018 #ifdef GC_CACHE_ADAPT_OUTPUT
4019 bamboo_output_cache_policy();
4021 cacheAdapt_gc(false);
4022 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4023 while(PREFINISHPHASE == gcphase) {
4024 // check the status of all cores
4025 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4026 if(gc_checkAllCoreStatus_I()) {
4027 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4030 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4031 } // while(PREFINISHPHASE == gcphase)
4032 #endif // GC_CACHE_ADAPT
4034 gcphase = FINISHPHASE;
4036 // invalidate all shared mem pointers
4037 // put it here as it takes time to inform all the other cores to
4038 // finish gc and it might cause problem when some core resumes
4039 // mutator earlier than the other cores
4040 bamboo_cur_msp = NULL;
4041 bamboo_smem_size = 0;
4042 bamboo_smem_zero_top = NULL;
4044 gcprocessing = false;
4049 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4050 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
4051 // send gc finish messages to all cores
4052 send_msg_1(i, GCFINISH, false);
4053 gccorestatus[i] = 1;
4055 #ifdef RAWPATH // TODO GC_DEBUG
4056 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
4057 udn_tile_coord_y());
4060 } // void gc_master(struct garbagelist * stackptr)
4062 inline bool gc(struct garbagelist * stackptr) {
4065 gcprocessing = false;
4069 // core coordinator routine
4070 if(0 == BAMBOO_NUM_OF_CORE) {
4072 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
4073 udn_tile_coord_y());
4075 bool isallstall = true;
4076 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4077 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4079 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
4080 if(gccorestatus[ti] != 0) {
4086 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4087 // some of the cores are still executing the mutator and did not reach
4088 // some gc safe point, therefore it is not ready to do gc
4089 // in case that there are some pregc information msg lost, send a confirm
4090 // msg to the 'busy' core
4091 send_msg_1(ti, GCSTARTPRE, false);
4099 //BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4100 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
4101 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
4104 BAMBOO_DEBUGPRINT(0xec04);
4106 for(int i = 0; i < NUMCORESACTIVE; ++i) {
4107 sumsendobj += gcnumsendobjs[0][i];
4109 BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
4111 } // for(i = 1; i < NUMCORESACTIVE; ++i)
4113 BAMBOO_DEBUGPRINT(0xec05);
4114 BAMBOO_DEBUGPRINT_REG(sumsendobj);
4116 for(int i = 0; i < NUMCORESACTIVE; ++i) {
4117 sumsendobj -= gcnumreceiveobjs[0][i];
4119 BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
4121 } // for(i = 1; i < NUMCORESACTIVE; ++i)
4123 BAMBOO_DEBUGPRINT(0xec06);
4124 BAMBOO_DEBUGPRINT_REG(sumsendobj);
4126 if(0 != sumsendobj) {
4127 // there were still some msgs on the fly, wait until there
4128 // are some update pregc information coming and check it again
4130 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4138 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4141 #ifdef RAWPATH // TODO GC_DEBUG
4142 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
4145 // Zero out the remaining bamboo_cur_msp
4146 // Only zero out the first 4 bytes of the remaining memory
4147 // Move the operation here because for the GC_CACHE_ADAPT version,
4148 // we need to make sure during the gcinit phase the shared heap is not
4149 // touched. Otherwise, there would be problem when adapt the cache
4151 if((bamboo_cur_msp != 0)
4152 && (bamboo_smem_zero_top == bamboo_cur_msp)
4153 && (bamboo_smem_size > 0)) {
4154 *((int *)bamboo_cur_msp) = 0;
4156 #ifdef GC_FLUSH_DTLB
4157 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4158 BAMBOO_CLEAN_DTLB();
4159 gc_num_flush_dtlb++;
4162 #ifdef GC_CACHE_ADAPT
4163 #ifdef GC_CACHE_SAMPLING
4164 // disable the timer interrupt
4165 bamboo_mask_timer_intr();
4166 // get the sampling data
4167 bamboo_output_dtlb_sampling();
4168 #endif // GC_CACHE_SAMPLING
4169 #endif // GC_CACHE_ADAPT
4170 gcprocessing = true;
4171 gc_master(stackptr);
4172 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
4173 // Zero out the remaining bamboo_cur_msp
4174 // Only zero out the first 4 bytes of the remaining memory
4175 // Move the operation here because for the GC_CACHE_ADAPT version,
4176 // we need to make sure during the gcinit phase the shared heap is not
4177 // touched. Otherwise, there would be problem when adapt the cache
4179 if((bamboo_cur_msp != 0)
4180 && (bamboo_smem_zero_top == bamboo_cur_msp)
4181 && (bamboo_smem_size > 0)) {
4182 *((int *)bamboo_cur_msp) = 0;
4184 #ifdef GC_FLUSH_DTLB
4185 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4186 BAMBOO_CLEAN_DTLB();
4187 gc_num_flush_dtlb++;
4190 #ifdef GC_CACHE_ADAPT
4191 #ifdef GC_CACHE_SAMPLING
4192 // disable the timer interrupt
4193 bamboo_mask_timer_intr();
4194 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4195 // get the sampling data
4196 bamboo_output_dtlb_sampling();
4198 #endif // GC_CACHE_SAMPLING
4199 #endif // GC_CACHE_ADAPT
4200 gcprocessing = true;
4201 gc_collect(stackptr);
4203 // invalidate all shared mem pointers
4204 bamboo_cur_msp = NULL;
4205 bamboo_smem_size = 0;
4206 bamboo_smem_zero_top = NULL;
4208 gcprocessing = false;
4210 // Zero out the remaining bamboo_cur_msp
4211 // Only zero out the first 4 bytes of the remaining memory
4212 // Move the operation here because for the GC_CACHE_ADAPT version,
4213 // we need to make sure during the gcinit phase the shared heap is not
4214 // touched. Otherwise, there would be problem when adapt the cache
4216 if((bamboo_cur_msp != 0)
4217 && (bamboo_smem_zero_top == bamboo_cur_msp)
4218 && (bamboo_smem_size > 0)) {
4219 *((int *)bamboo_cur_msp) = 0;
4221 #ifdef GC_FLUSH_DTLB
4222 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4223 BAMBOO_CLEAN_DTLB();
4224 gc_num_flush_dtlb++;
4227 #ifdef GC_CACHE_ADAPT
4228 #ifdef GC_CACHE_SAMPLING
4229 // disable the timer interrupt
4230 bamboo_mask_timer_intr();
4231 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4232 // get the sampling data
4233 bamboo_output_dtlb_sampling();
4235 #endif // GC_CACHE_SAMPLING
4236 #endif // GC_CACHE_ADAPT
4237 // not a gc core, should wait for gcfinish msg
4238 gcprocessing = true;
4239 gc_nocollect(stackptr);
4241 // invalidate all shared mem pointers
4242 bamboo_cur_msp = NULL;
4243 bamboo_smem_size = 0;
4244 bamboo_smem_zero_top = NULL;
4246 gcprocessing = false;
4248 #ifdef GC_CACHE_ADAPT
4249 #ifdef GC_CACHE_SAMPLING
4250 // reset the sampling arrays
4251 bamboo_dtlb_sampling_reset();
4252 #endif // GC_CACHE_SAMPLING
4253 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4254 // zero out the gccachesamplingtbl
4255 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4256 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4257 size_cachesamplingtbl_local_r);
4258 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
4259 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4262 #ifdef GC_CACHE_SAMPLING
4263 // enable the timer interrupt
4264 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4265 bamboo_unmask_timer_intr();
4266 #endif // GC_CACHE_SAMPLING
4267 #endif // GC_CACHE_ADAPT
4269 } // void gc(struct garbagelist * stackptr)
4272 inline void gc_profileStart(void) {
4273 if(!gc_infoOverflow) {
4274 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4275 gc_infoArray[gc_infoIndex] = gcInfo;
4277 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4281 inline void gc_profileItem(void) {
4282 if(!gc_infoOverflow) {
4283 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4284 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4288 inline void gc_profileEnd(void) {
4289 if(!gc_infoOverflow) {
4290 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4291 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4292 gcInfo->time[gcInfo->index++] = gc_num_livespace;
4293 gcInfo->time[gcInfo->index++] = gc_num_freespace;
4294 gcInfo->time[gcInfo->index++] = gc_num_lobj;
4295 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4296 gcInfo->time[gcInfo->index++] = gc_num_obj;
4297 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4298 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4300 if(gc_infoIndex == GCINFOLENGTH) {
4301 gc_infoOverflow = true;
4302 //taskInfoIndex = 0;
4307 // output the profiling data
4308 void gc_outputProfileData() {
4311 unsigned long long totalgc = 0;
4313 //printf("Start Time, End Time, Duration\n");
4314 // output task related info
4315 for(i = 0; i < gc_infoIndex; i++) {
4316 GCInfo * gcInfo = gc_infoArray[i];
4317 unsigned long long tmp = 0;
4318 for(j = 0; j < gcInfo->index; j++) {
4319 printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
4320 tmp = gcInfo->time[j];
4322 tmp = (tmp-gcInfo->time[0]);
4323 printf(" ++ %lld \n", tmp);
4327 if(gc_infoOverflow) {
4328 printf("Caution: gc info overflow!\n");
4331 printf("\n\n total gc time: %lld \n", totalgc);
4335 unsigned long long totalgc = 0;
4337 #ifndef BAMBOO_MEMPROF
4338 BAMBOO_DEBUGPRINT(0xdddd);
4340 // output task related info
4341 for(i= 0; i < gc_infoIndex; i++) {
4342 GCInfo * gcInfo = gc_infoArray[i];
4343 #ifdef BAMBOO_MEMPROF
4344 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4346 unsigned long long tmp = 0;
4347 BAMBOO_DEBUGPRINT(0xddda);
4348 for(j = 0; j < gcInfo->index - 7; j++) {
4349 BAMBOO_DEBUGPRINT(gcInfo->time[j]);
4350 BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
4351 BAMBOO_DEBUGPRINT(0xdddb);
4352 tmp = gcInfo->time[j];
4354 tmp = (tmp-gcInfo->time[0]);
4355 BAMBOO_DEBUGPRINT_REG(tmp);
4356 BAMBOO_DEBUGPRINT(0xdddc);
4357 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]);
4358 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]);
4359 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]);
4360 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]);
4361 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]);
4362 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]);
4363 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]);
4364 BAMBOO_DEBUGPRINT(0xddde);
4368 #ifndef BAMBOO_MEMPROF
4369 BAMBOO_DEBUGPRINT(0xdddf);
4371 BAMBOO_DEBUGPRINT_REG(totalgc);
4373 if(gc_infoOverflow) {
4374 BAMBOO_DEBUGPRINT(0xefee);
4377 #ifndef BAMBOO_MEMPROF
4378 BAMBOO_DEBUGPRINT(0xeeee);
4382 #endif // #ifdef GC_PROFILE