3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 //void * dsts[NUMLOBJPTRS];
45 int lengths[NUMLOBJPTRS];
46 //void * origs[NUMLOBJPTRS];
47 int hosts[NUMLOBJPTRS];
48 struct lobjpointerblock *next;
49 struct lobjpointerblock *prev;
52 struct lobjpointerblock *gclobjhead=NULL;
53 int gclobjheadindex=0;
54 struct lobjpointerblock *gclobjtail=NULL;
55 int gclobjtailindex=0;
56 struct lobjpointerblock *gclobjtail2=NULL;
57 int gclobjtailindex2=0;
58 struct lobjpointerblock *gclobjspare=NULL;
61 typedef struct gc_cache_revise_info {
62 int orig_page_start_va;
68 int revised_sampling[NUMCORESACTIVE];
69 } gc_cache_revise_info_t;
70 gc_cache_revise_info_t gc_cache_revise_infomation;
71 #endif// GC_CACHE_ADAPT
74 // dump whole mem in blocks
75 inline void dumpSMem() {
83 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
85 // reserved blocks for sblocktbl
86 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
88 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
89 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
90 udn_tile_coord_x(), udn_tile_coord_y(),
91 *((int *)(i)), *((int *)(i + 4)),
92 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
93 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
94 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
95 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
96 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
97 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
98 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
100 sblock = gcreservedsb;
101 bool advanceblock = false;
103 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
104 advanceblock = false;
105 // computing sblock # and block #, core coordinate (x,y) also
106 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
108 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
109 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
121 coren = gc_block2core[block%(NUMCORES4GC*2)];
123 // compute core coordinate
124 BAMBOO_COORDS(coren, &x, &y);
125 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
126 udn_tile_coord_x(), udn_tile_coord_y(),
127 block, sblock++, x, y,
128 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
131 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
132 udn_tile_coord_x(), udn_tile_coord_y(),
133 *((int *)(i)), *((int *)(i + 4)),
134 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
135 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
136 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
137 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
138 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
139 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
140 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
142 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
146 // should be invoked with interruption closed
147 inline void gc_enqueue_I(void *ptr) {
149 BAMBOO_DEBUGPRINT(0xe601);
150 BAMBOO_DEBUGPRINT_REG(ptr);
152 if (gcheadindex==NUMPTRS) {
153 struct pointerblock * tmp;
158 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
159 } // if (gcspare!=NULL)
163 } // if (gcheadindex==NUMPTRS)
164 gchead->ptrs[gcheadindex++]=ptr;
166 BAMBOO_DEBUGPRINT(0xe602);
168 } // void gc_enqueue_I(void *ptr)
170 // dequeue and destroy the queue
171 inline void * gc_dequeue_I() {
172 if (gctailindex==NUMPTRS) {
173 struct pointerblock *tmp=gctail;
180 } // if (gcspare!=NULL)
181 } // if (gctailindex==NUMPTRS)
182 return gctail->ptrs[gctailindex++];
183 } // void * gc_dequeue()
185 // dequeue and do not destroy the queue
186 inline void * gc_dequeue2_I() {
187 if (gctailindex2==NUMPTRS) {
188 struct pointerblock *tmp=gctail2;
189 gctail2=gctail2->next;
191 } // if (gctailindex2==NUMPTRS)
192 return gctail2->ptrs[gctailindex2++];
193 } // void * gc_dequeue2()
195 inline int gc_moreItems_I() {
196 if ((gchead==gctail)&&(gctailindex==gcheadindex))
199 } // int gc_moreItems()
201 inline int gc_moreItems2_I() {
202 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
205 } // int gc_moreItems2()
207 // should be invoked with interruption closed
208 // enqueue a large obj: start addr & length
209 inline void gc_lobjenqueue_I(void *ptr,
213 BAMBOO_DEBUGPRINT(0xe901);
215 if (gclobjheadindex==NUMLOBJPTRS) {
216 struct lobjpointerblock * tmp;
217 if (gclobjspare!=NULL) {
221 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
222 } // if (gclobjspare!=NULL)
223 gclobjhead->next=tmp;
224 tmp->prev = gclobjhead;
227 } // if (gclobjheadindex==NUMLOBJPTRS)
228 gclobjhead->lobjs[gclobjheadindex]=ptr;
229 gclobjhead->lengths[gclobjheadindex]=length;
230 gclobjhead->hosts[gclobjheadindex++]=host;
232 BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
233 BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
234 BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
236 } // void gc_lobjenqueue_I(void *ptr...)
238 // dequeue and destroy the queue
239 inline void * gc_lobjdequeue_I(int * length,
241 if (gclobjtailindex==NUMLOBJPTRS) {
242 struct lobjpointerblock *tmp=gclobjtail;
243 gclobjtail=gclobjtail->next;
245 gclobjtail->prev = NULL;
246 if (gclobjspare!=NULL) {
252 } // if (gclobjspare!=NULL)
253 } // if (gclobjtailindex==NUMLOBJPTRS)
255 *length = gclobjtail->lengths[gclobjtailindex];
258 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
260 return gclobjtail->lobjs[gclobjtailindex++];
261 } // void * gc_lobjdequeue()
263 inline int gc_lobjmoreItems_I() {
264 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
267 } // int gc_lobjmoreItems()
269 // dequeue and don't destroy the queue
270 inline void gc_lobjdequeue2_I() {
271 if (gclobjtailindex2==NUMLOBJPTRS) {
272 gclobjtail2=gclobjtail2->next;
276 } // if (gclobjtailindex2==NUMLOBJPTRS)
277 } // void * gc_lobjdequeue2()
279 inline int gc_lobjmoreItems2_I() {
280 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
283 } // int gc_lobjmoreItems2()
285 // 'reversly' dequeue and don't destroy the queue
286 inline void gc_lobjdequeue3_I() {
287 if (gclobjtailindex2==0) {
288 gclobjtail2=gclobjtail2->prev;
289 gclobjtailindex2=NUMLOBJPTRS-1;
292 } // if (gclobjtailindex2==NUMLOBJPTRS)
293 } // void * gc_lobjdequeue3()
295 inline int gc_lobjmoreItems3_I() {
296 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
299 } // int gc_lobjmoreItems3()
301 inline void gc_lobjqueueinit4_I() {
302 gclobjtail2 = gclobjtail;
303 gclobjtailindex2 = gclobjtailindex;
304 } // void gc_lobjqueueinit2()
306 inline void * gc_lobjdequeue4_I(int * length,
308 if (gclobjtailindex2==NUMLOBJPTRS) {
309 gclobjtail2=gclobjtail2->next;
311 } // if (gclobjtailindex==NUMLOBJPTRS)
313 *length = gclobjtail2->lengths[gclobjtailindex2];
316 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
318 return gclobjtail2->lobjs[gclobjtailindex2++];
319 } // void * gc_lobjdequeue()
321 inline int gc_lobjmoreItems4_I() {
322 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
325 } // int gc_lobjmoreItems(
327 INTPTR gccurr_heapbound = 0;
329 inline void gettype_size(void * ptr,
332 int type = ((int *)ptr)[0];
334 if(type < NUMCLASSES) {
336 size = classsize[type];
339 struct ArrayObject *ao=(struct ArrayObject *)ptr;
340 int elementsize=classsize[type];
341 int length=ao->___length___;
342 size=sizeof(struct ArrayObject)+length*elementsize;
343 } // if(type < NUMCLASSES)
348 inline bool isLarge(void * ptr,
352 BAMBOO_DEBUGPRINT(0xe701);
353 BAMBOO_DEBUGPRINT_REG(ptr);
355 // check if a pointer is referring to a large object
356 gettype_size(ptr, ttype, tsize);
358 BAMBOO_DEBUGPRINT(*tsize);
360 int bound = (BAMBOO_SMEM_SIZE);
361 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
362 bound = (BAMBOO_SMEM_SIZE_L);
364 if((((int)ptr-gcbaseva)%(bound))==0) {
365 // ptr is a start of a block
367 BAMBOO_DEBUGPRINT(0xe702);
368 BAMBOO_DEBUGPRINT(1);
372 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
373 // it acrosses the boundary of current block
375 BAMBOO_DEBUGPRINT(0xe703);
376 BAMBOO_DEBUGPRINT(1);
381 BAMBOO_DEBUGPRINT(0);
384 } // bool isLarge(void * ptr, int * ttype, int * tsize)
386 inline int hostcore(void * ptr) {
387 // check the host core of ptr
389 RESIDECORE(ptr, &host);
391 BAMBOO_DEBUGPRINT(0xedd0);
392 BAMBOO_DEBUGPRINT_REG(ptr);
393 BAMBOO_DEBUGPRINT_REG(host);
396 } // int hostcore(void * ptr)
398 inline void cpu2coords(int coren,
401 *x = bamboo_cpu2coords[2*coren];
402 *y = bamboo_cpu2coords[2*coren+1];
403 } // void cpu2coords(...)
405 inline bool isLocal(void * ptr) {
406 // check if a pointer is in shared heap on this core
407 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
408 } // bool isLocal(void * ptr)
410 inline bool gc_checkCoreStatus_I() {
411 bool allStall = true;
412 for(int i = 0; i < NUMCORES4GC; ++i) {
413 if(gccorestatus[i] != 0) {
416 } // if(gccorestatus[i] != 0)
417 } // for(i = 0; i < NUMCORES4GC; ++i)
421 inline bool gc_checkAllCoreStatus_I() {
422 bool allStall = true;
423 for(int i = 0; i < NUMCORESACTIVE; ++i) {
424 if(gccorestatus[i] != 0) {
427 } // if(gccorestatus[i] != 0)
428 } // for(i = 0; i < NUMCORESACTIVE; ++i)
432 inline void checkMarkStatue() {
434 BAMBOO_DEBUGPRINT(0xee01);
438 (waitconfirm && (numconfirm == 0))) {
440 BAMBOO_DEBUGPRINT(0xee02);
445 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
448 entry_index = gcnumsrobjs_index;
450 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
451 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
452 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
453 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
454 // check the status of all cores
455 bool allStall = gc_checkAllCoreStatus_I();
457 BAMBOO_DEBUGPRINT(0xee03);
461 BAMBOO_DEBUGPRINT(0xee04);
466 BAMBOO_DEBUGPRINT(0xee05);
468 // the first time found all cores stall
469 // send out status confirm msg to all other cores
470 // reset the corestatus array too
471 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
473 numconfirm = NUMCORESACTIVE - 1;
474 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
475 for(i = 1; i < NUMCORESACTIVE; ++i) {
477 // send mark phase finish confirm request msg to core i
478 send_msg_1(i, GCMARKCONFIRM, false);
479 } // for(i = 1; i < NUMCORESACTIVE; ++i)
482 // check if the sum of send objs and receive obj are the same
483 // yes->check if the info is the latest; no->go on executing
485 for(i = 0; i < NUMCORESACTIVE; ++i) {
486 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
487 } // for(i = 0; i < NUMCORESACTIVE; ++i)
489 BAMBOO_DEBUGPRINT(0xee06);
490 BAMBOO_DEBUGPRINT_REG(sumsendobj);
492 for(i = 0; i < NUMCORESACTIVE; ++i) {
493 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
494 } // for(i = 0; i < NUMCORESACTIVE; ++i)
496 BAMBOO_DEBUGPRINT(0xee07);
497 BAMBOO_DEBUGPRINT_REG(sumsendobj);
499 if(0 == sumsendobj) {
500 // Check if there are changes of the numsendobjs or numreceiveobjs on
502 bool ischanged = false;
503 for(i = 0; i < NUMCORESACTIVE; ++i) {
504 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
505 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
509 } // for(i = 0; i < NUMCORESACTIVE; ++i)
511 BAMBOO_DEBUGPRINT(0xee08);
512 BAMBOO_DEBUGPRINT_REG(ischanged);
516 BAMBOO_DEBUGPRINT(0xee09);
518 // all the core status info are the latest
520 gcphase = COMPACTPHASE;
521 // restore the gcstatus for all cores
522 for(i = 0; i < NUMCORESACTIVE; ++i) {
524 } // for(i = 0; i < NUMCORESACTIVE; ++i)
527 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
530 // There were changes between phase 1 and phase 2, can not decide
531 // whether the mark phase has been finished
533 // As it fails in phase 2, flip the entries
534 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
535 } // if(0 == sumsendobj) else ...
536 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
537 } // if(!gcwaitconfirm) else()
539 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
541 } // if((!waitconfirm)...
543 BAMBOO_DEBUGPRINT(0xee0a);
545 } // void checkMarkStatue()
547 inline bool preGC() {
548 // preparation for gc
549 // make sure to clear all incoming msgs espacially transfer obj msgs
551 BAMBOO_DEBUGPRINT(0xec01);
555 (waitconfirm && (numconfirm == 0))) {
556 // send out status confirm msgs to all cores to check if there are
557 // transfer obj msgs on-the-fly
559 numconfirm = NUMCORESACTIVE - 1;
560 for(i = 1; i < NUMCORESACTIVE; ++i) {
562 // send status confirm msg to core i
563 send_msg_1(i, STATUSCONFIRM, false);
564 } // for(i = 1; i < NUMCORESACTIVE; ++i)
567 BAMBOO_DEBUGPRINT(0xec02);
570 if(numconfirm == 0) {
573 } // wait for confirmations
577 BAMBOO_DEBUGPRINT(0xec03);
579 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
580 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
583 BAMBOO_DEBUGPRINT(0xec04);
585 for(i = 0; i < NUMCORESACTIVE; ++i) {
586 sumsendobj += numsendobjs[i];
588 BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
590 } // for(i = 1; i < NUMCORESACTIVE; ++i)
592 BAMBOO_DEBUGPRINT(0xec05);
593 BAMBOO_DEBUGPRINT_REG(sumsendobj);
595 for(i = 0; i < NUMCORESACTIVE; ++i) {
596 sumsendobj -= numreceiveobjs[i];
598 BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
600 } // for(i = 1; i < NUMCORESACTIVE; ++i)
602 BAMBOO_DEBUGPRINT(0xec06);
603 BAMBOO_DEBUGPRINT_REG(sumsendobj);
605 if(0 == sumsendobj) {
608 // still have some transfer obj msgs on-the-fly, can not start gc
610 } // if(0 == sumsendobj)
613 BAMBOO_DEBUGPRINT(0xec07);
615 // previously asked for status confirmation and do not have all the
616 // confirmations yet, can not start gc
618 } // if((!waitconfirm) ||
621 inline void initGC() {
623 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
624 for(i = 0; i < NUMCORES4GC; ++i) {
626 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
627 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
629 gcrequiredmems[i] = 0;
630 gcfilledblocks[i] = 0;
632 } // for(i = 0; i < NUMCORES4GC; ++i)
633 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
635 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
636 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
641 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
642 gcself_numsendobjs = 0;
643 gcself_numreceiveobjs = 0;
644 gcmarkedptrbound = 0;
647 //gcismapped = false;
658 gcheadindex=gctailindex=gctailindex2 = 0;
659 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
661 gctailindex = gctailindex2 = gcheadindex;
662 gctail = gctail2 = gchead;
665 // initialize the large obj queues
666 if (gclobjhead==NULL) {
669 gclobjtailindex2 = 0;
670 gclobjhead=gclobjtail=gclobjtail2=
671 RUNMALLOC(sizeof(struct lobjpointerblock));
673 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
674 gclobjtail = gclobjtail2 = gclobjhead;
676 gclobjhead->next = gclobjhead->prev = NULL;
678 #ifdef LOCALHASHTBL_TEST
679 freeRuntimeHash(gcpointertbl);
680 gcpointertbl = allocateRuntimeHash(20);
682 mgchashreset(gcpointertbl);
684 //gcpointertbl = allocateMGCHash(20);
686 freeMGCHash(gcforwardobjtbl);
687 gcforwardobjtbl = allocateMGCHash(20, 3);
689 // initialize the mapping info related structures
690 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
691 // Never free the shared hash table, just reset it
692 /*freeGCSharedHash(gcsharedptbl);
693 gcsharedptbl = allocateGCSharedHash(20);*/
694 mgcsharedhashReset(gcsharedptbl);
696 // Zero out the remaining bamboo_cur_msp
697 // Only zero out the first 4 bytes of the remaining memory
698 /*if((bamboo_cur_msp != 0)
699 && (bamboo_smem_zero_top == bamboo_cur_msp)
700 && (bamboo_smem_size > 0)) {
701 *((int *)bamboo_cur_msp) = 0;
704 gc_num_livespace = 0;
705 gc_num_freespace = 0;
707 gc_num_lobjspace = 0;
709 gc_num_forwardobj = 0;
710 gc_num_profiles = NUMCORESACTIVE - 1;
714 // compute load balance for all cores
715 inline int loadbalance(int * heaptop) {
716 // compute load balance
719 // get the total loads
720 int tloads = gcloads[STARTUPCORE];
721 for(i = 1; i < NUMCORES4GC; i++) {
722 tloads += gcloads[i];
724 *heaptop = gcbaseva + tloads;
727 BAMBOO_DEBUGPRINT(0xdddd);
728 BAMBOO_DEBUGPRINT_REG(tloads);
729 BAMBOO_DEBUGPRINT_REG(*heaptop);
732 BLOCKINDEX(*heaptop, &b);
733 int numbpc = b / NUMCORES4GC; // num of blocks per core
735 BAMBOO_DEBUGPRINT_REG(b);
736 BAMBOO_DEBUGPRINT_REG(numbpc);
739 RESIDECORE(heaptop, &gctopcore);
741 BAMBOO_DEBUGPRINT_REG(gctopcore);
744 } // void loadbalance(int * heaptop)
746 inline bool cacheLObjs() {
747 // check the total mem size need for large objs
748 unsigned long long sumsize = 0;
751 BAMBOO_DEBUGPRINT(0xe801);
753 gclobjtail2 = gclobjtail;
754 gclobjtailindex2 = gclobjtailindex;
758 // compute total mem size required and sort the lobjs in ascending order
759 while(gc_lobjmoreItems2_I()) {
761 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
762 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
763 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
769 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
770 BAMBOO_DEBUGPRINT_REG(tmp_len);
771 BAMBOO_DEBUGPRINT_REG(sumsize);
773 int i = gclobjtailindex2-1;
774 struct lobjpointerblock * tmp_block = gclobjtail2;
775 // find the place to insert
778 if(tmp_block->prev == NULL) {
781 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
782 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
783 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
784 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
785 tmp_block = tmp_block->prev;
789 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
791 if(tmp_block->lobjs[i-1] > tmp_lobj) {
792 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
793 tmp_block->lengths[i] = tmp_block->lengths[i-1];
794 tmp_block->hosts[i] = tmp_block->hosts[i-1];
798 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
799 } // if(i ==0 ) else {}
802 if(i != gclobjtailindex2 - 1) {
803 tmp_block->lobjs[i] = tmp_lobj;
804 tmp_block->lengths[i] = tmp_len;
805 tmp_block->hosts[i] = tmp_host;
807 } // while(gc_lobjmoreItems2())
810 gc_num_lobjspace = sumsize;
812 // check if there are enough space to cache these large objs
813 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
814 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
815 // do not have enough room to cache large objs
817 BAMBOO_DEBUGPRINT(0xe802);
818 BAMBOO_DEBUGPRINT_REG(dst);
819 BAMBOO_DEBUGPRINT_REG(gcheaptop);
820 BAMBOO_DEBUGPRINT_REG(sumsize);
825 BAMBOO_DEBUGPRINT(0xe803);
826 BAMBOO_DEBUGPRINT_REG(dst);
827 BAMBOO_DEBUGPRINT_REG(gcheaptop);
830 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
831 // cache the largeObjs to the top of the shared heap
832 //gclobjtail2 = gclobjtail;
833 //gclobjtailindex2 = gclobjtailindex;
834 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
835 while(gc_lobjmoreItems3_I()) {
837 size = gclobjtail2->lengths[gclobjtailindex2];
838 // set the mark field to , indicating that this obj has been moved
839 // and need to be flushed
840 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
842 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
843 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
845 //BAMBOO_WRITE_HINT_CACHE(dst, size);
846 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
849 BAMBOO_DEBUGPRINT(0x804);
850 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
851 BAMBOO_DEBUGPRINT(dst);
852 BAMBOO_DEBUGPRINT_REG(size);
853 BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
854 BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
858 } // void cacheLObjs()
860 // update the bmmboo_smemtbl to record current shared mem usage
861 void updateSmemTbl(int coren,
864 int bound = BAMBOO_SMEM_SIZE_L;
865 BLOCKINDEX(localtop, <opcore);
866 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
867 bound = BAMBOO_SMEM_SIZE;
869 int load = (localtop-gcbaseva)%bound;
874 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
875 if(toset < ltopcore) {
876 bamboo_smemtbl[toset]=
877 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
879 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
881 } else if(toset == ltopcore) {
882 bamboo_smemtbl[toset] = load;
884 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
896 } // void updateSmemTbl(int, int)
898 inline void moveLObjs() {
900 BAMBOO_DEBUGPRINT(0xea01);
903 // update the gcmem_mixed_usedmem
904 gcmem_mixed_usedmem = 0;
906 // zero out the smemtbl
907 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
908 // find current heap top
909 // flush all gcloads to indicate the real heap top on one core
910 // previous it represents the next available ptr on a core
911 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
912 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
913 // edge of a block, check if this is exactly the heaptop
914 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
915 gcloads[0]+=(gcfilledblocks[0]>1 ?
916 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
918 updateSmemTbl(0, gcloads[0]);
920 BAMBOO_DEBUGPRINT(0xea02);
921 BAMBOO_DEBUGPRINT_REG(gcloads[0]);
922 BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
924 for(int i = 1; i < NUMCORES4GC; i++) {
927 BAMBOO_DEBUGPRINT(0xf000+i);
928 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
929 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
931 if((gcfilledblocks[i] > 0)
932 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
933 // edge of a block, check if this is exactly the heaptop
934 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
936 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
939 updateSmemTbl(i, gcloads[i]);
941 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
943 } // for(int i = 1; i < NUMCORES4GC; i++) {
945 // find current heap top
947 // a bug here: when using local allocation, directly move large objects
948 // to the highest free chunk might not be memory efficient
953 for(i = gcnumblock-1; i >= 0; i--) {
954 if(bamboo_smemtbl[i] > 0) {
959 tmpheaptop = gcbaseva;
961 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
962 (BAMBOO_SMEM_SIZE_L*i) :
963 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
966 // move large objs from gcheaptop to tmpheaptop
967 // write the header first
968 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
970 gcmem_mixed_usedmem += tomove;
973 BAMBOO_DEBUGPRINT(0xea03);
974 BAMBOO_DEBUGPRINT_REG(tomove);
975 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
976 BAMBOO_DEBUGPRINT_REG(gcheaptop);
978 // flush the sbstartbl
979 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
980 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
982 gcheaptop = tmpheaptop;
984 // check how many blocks it acrosses
985 int remain = tmpheaptop-gcbaseva;
986 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
987 int b = 0; // number of the block
988 BLOCKINDEX(tmpheaptop, &b);
989 // check the remaining space in this block
990 bound = (BAMBOO_SMEM_SIZE);
991 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
992 bound = (BAMBOO_SMEM_SIZE_L);
994 remain = bound - remain%bound;
997 BAMBOO_DEBUGPRINT(0xea04);
1003 int base = tmpheaptop;
1005 remain -= BAMBOO_CACHE_LINE_SIZE;
1006 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1007 gc_lobjqueueinit4_I();
1008 while(gc_lobjmoreItems4_I()) {
1009 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
1010 ALIGNSIZE(size, &isize);
1011 if(remain < isize) {
1012 // this object acrosses blocks
1014 // close current block, fill its header
1015 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1016 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1017 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
1021 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1022 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1024 remain -= BAMBOO_CACHE_LINE_SIZE;
1025 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1026 BLOCKINDEX(tmpheaptop, &b);
1027 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
1028 } // if(cpysize > 0)
1030 // move the large obj
1031 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1032 memmove(tmpheaptop, gcheaptop, size);
1034 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1035 memcpy(tmpheaptop, gcheaptop, size);
1037 // fill the remaining space with -2 padding
1038 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1040 BAMBOO_DEBUGPRINT(0xea05);
1041 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1042 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1043 BAMBOO_DEBUGPRINT_REG(size);
1044 BAMBOO_DEBUGPRINT_REG(isize);
1045 BAMBOO_DEBUGPRINT_REG(base);
1048 // cache the mapping info anyway
1049 //if(ptr != tmpheaptop) {
1050 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1051 #ifdef LOCALHASHTBL_TEST
1052 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1054 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1056 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1057 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1060 BAMBOO_DEBUGPRINT(0xcdca);
1061 BAMBOO_DEBUGPRINT_REG(ptr);
1062 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1064 if(host != BAMBOO_NUM_OF_CORE) {
1065 // send the original host core with the mapping info
1066 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1068 BAMBOO_DEBUGPRINT(0xcdcb);
1069 BAMBOO_DEBUGPRINT_REG(ptr);
1070 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1072 } // if(host != BAMBOO_NUM_OF_CORE)
1073 tmpheaptop += isize;
1075 // set the gcsbstarttbl and bamboo_smemtbl
1076 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
1077 for(int k = 1; k < tmpsbs; k++) {
1078 gcsbstarttbl[sb+k] = (INTPTR)(-1);
1081 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1082 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
1083 for(; b < tmpsbs; b++) {
1084 bamboo_smemtbl[b] = bound;
1085 if(b==NUMCORES4GC-1) {
1086 bound = BAMBOO_SMEM_SIZE;
1089 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
1090 gcsbstarttbl[sb] = (INTPTR)(-1);
1091 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1092 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1093 bamboo_smemtbl[b] = bound;
1095 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1096 remain = tmpheaptop-gcbaseva;
1097 bamboo_smemtbl[b] = remain%bound;
1098 remain = bound - bamboo_smemtbl[b];
1099 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1101 // close current block and fill the header
1102 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1103 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1106 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1107 // fill with 0 in case
1108 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
1110 remain -= BAMBOO_CACHE_LINE_SIZE;
1111 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1114 // move the large obj
1115 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1116 memmove(tmpheaptop, gcheaptop, size);
1118 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1119 memcpy(tmpheaptop, gcheaptop, size);
1121 // fill the remaining space with -2 padding
1122 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1124 BAMBOO_DEBUGPRINT(0xea06);
1125 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1126 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1127 BAMBOO_DEBUGPRINT_REG(size);
1128 BAMBOO_DEBUGPRINT_REG(isize);
1133 // cache the mapping info anyway
1134 //if(ptr != tmpheaptop) {
1135 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1136 #ifdef LOCALHASHTBL_TEST
1137 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1139 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1141 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1142 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1145 BAMBOO_DEBUGPRINT(0xcdcc);
1146 BAMBOO_DEBUGPRINT_REG(ptr);
1147 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1148 BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1150 if(host != BAMBOO_NUM_OF_CORE) {
1151 // send the original host core with the mapping info
1152 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1154 BAMBOO_DEBUGPRINT(0xcdcd);
1155 BAMBOO_DEBUGPRINT_REG(ptr);
1156 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1158 } // if(host != BAMBOO_NUM_OF_CORE)
1159 tmpheaptop += isize;
1161 // update bamboo_smemtbl
1162 bamboo_smemtbl[b] += isize;
1163 } // if(remain < isize) else ...
1164 } // while(gc_lobjmoreItems())
1166 // close current block, fill the header
1167 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1168 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1169 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
1171 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1173 gcheaptop = tmpheaptop;
1175 } // if(tomove == 0)
1178 BAMBOO_DEBUGPRINT(0xea07);
1179 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1182 bamboo_free_block = 0;
1185 tbound = (bamboo_free_block<NUMCORES4GC) ?
1186 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1187 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1188 bamboo_free_block++;
1190 // the first non-full partition
1196 // check how many live space there are
1197 gc_num_livespace = 0;
1198 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1199 gc_num_livespace += bamboo_smemtbl[tmpi];
1201 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1204 BAMBOO_DEBUGPRINT(0xea08);
1205 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1207 } // void moveLObjs()
1209 inline void markObj(void * objptr) {
1210 if(objptr == NULL) {
1213 if(ISSHAREDOBJ(objptr)) {
1214 int host = hostcore(objptr);
1215 if(BAMBOO_NUM_OF_CORE == host) {
1217 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1218 if(((int *)objptr)[6] == INIT) {
1219 // this is the first time that this object is discovered,
1220 // set the flag as DISCOVERED
1221 ((int *)objptr)[6] |= DISCOVERED;
1222 BAMBOO_CACHE_FLUSH_LINE(objptr);
1223 gc_enqueue_I(objptr);
1225 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1228 BAMBOO_DEBUGPRINT(0xbbbb);
1229 BAMBOO_DEBUGPRINT_REG(host);
1230 BAMBOO_DEBUGPRINT_REG(objptr);
1232 // check if this obj has been forwarded
1233 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1234 // send a msg to host informing that objptr is active
1235 send_msg_2(host, GCMARKEDOBJ, objptr, /*BAMBOO_NUM_OF_CORE,*/ false);
1237 gc_num_forwardobj++;
1238 #endif // GC_PROFILE
1239 gcself_numsendobjs++;
1240 MGCHashadd(gcforwardobjtbl, (int)objptr);
1244 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1245 gc_enqueue_I(objptr);
1246 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1247 } // if(ISSHAREDOBJ(objptr))
1248 } // void markObj(void * objptr)
1250 // enqueue root objs
1251 inline void tomark(struct garbagelist * stackptr) {
1252 if(MARKPHASE != gcphase) {
1254 BAMBOO_DEBUGPRINT_REG(gcphase);
1256 BAMBOO_EXIT(0xb101);
1258 gcbusystatus = true;
1262 // enqueue current stack
1263 while(stackptr!=NULL) {
1265 BAMBOO_DEBUGPRINT(0xe501);
1266 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1267 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1268 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1270 for(i=0; i<stackptr->size; i++) {
1271 if(stackptr->array[i] != NULL) {
1272 markObj(stackptr->array[i]);
1275 stackptr=stackptr->next;
1279 BAMBOO_DEBUGPRINT(0xe503);
1281 // enqueue objectsets
1282 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1283 for(i=0; i<NUMCLASSES; i++) {
1284 struct parameterwrapper ** queues =
1285 objectqueues[BAMBOO_NUM_OF_CORE][i];
1286 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1287 for(j = 0; j < length; ++j) {
1288 struct parameterwrapper * parameter = queues[j];
1289 struct ObjectHash * set=parameter->objectset;
1290 struct ObjectNode * ptr=set->listhead;
1292 markObj((void *)ptr->key);
1299 // euqueue current task descriptor
1300 if(currtpd != NULL) {
1302 BAMBOO_DEBUGPRINT(0xe504);
1304 for(i=0; i<currtpd->numParameters; i++) {
1305 markObj(currtpd->parameterArray[i]);
1310 BAMBOO_DEBUGPRINT(0xe505);
1312 // euqueue active tasks
1313 if(activetasks != NULL) {
1314 struct genpointerlist * ptr=activetasks->list;
1316 struct taskparamdescriptor *tpd=ptr->src;
1318 for(i=0; i<tpd->numParameters; i++) {
1319 markObj(tpd->parameterArray[i]);
1326 BAMBOO_DEBUGPRINT(0xe506);
1328 // enqueue cached transferred obj
1329 struct QueueItem * tmpobjptr = getHead(&objqueue);
1330 while(tmpobjptr != NULL) {
1331 struct transObjInfo * objInfo =
1332 (struct transObjInfo *)(tmpobjptr->objectptr);
1333 markObj(objInfo->objptr);
1334 tmpobjptr = getNextQueueItem(tmpobjptr);
1338 BAMBOO_DEBUGPRINT(0xe507);
1340 // enqueue cached objs to be transferred
1341 struct QueueItem * item = getHead(totransobjqueue);
1342 while(item != NULL) {
1343 struct transObjInfo * totransobj =
1344 (struct transObjInfo *)(item->objectptr);
1345 markObj(totransobj->objptr);
1346 item = getNextQueueItem(item);
1347 } // while(item != NULL)
1350 BAMBOO_DEBUGPRINT(0xe508);
1352 // enqueue lock related info
1353 for(i = 0; i < runtime_locklen; ++i) {
1354 markObj((void *)(runtime_locks[i].redirectlock));
1355 if(runtime_locks[i].value != NULL) {
1356 markObj((void *)(runtime_locks[i].value));
1360 } // void tomark(struct garbagelist * stackptr)
1362 inline void mark(bool isfirst,
1363 struct garbagelist * stackptr) {
1365 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
1369 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
1371 // enqueue root objs
1373 gccurr_heaptop = 0; // record the size of all active objs in this core
1374 // aligned but does not consider block boundaries
1375 gcmarkedptrbound = 0;
1378 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
1381 bool checkfield = true;
1382 bool sendStall = false;
1384 while(MARKPHASE == gcphase) {
1386 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
1389 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1390 bool hasItems = gc_moreItems2_I();
1391 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1393 BAMBOO_DEBUGPRINT(0xed05);
1399 gcbusystatus = true;
1401 void * ptr = gc_dequeue2_I();
1404 BAMBOO_DEBUGPRINT_REG(ptr);
1409 // check if it is a shared obj
1410 if(ISSHAREDOBJ(ptr)) {
1411 // a shared obj, check if it is a local obj on this core
1412 int host = hostcore(ptr);
1413 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1415 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1416 if(isLarge(ptr, &type, &size) && isnotmarked) {
1417 // ptr is a large object and not marked or enqueued
1419 BAMBOO_DEBUGPRINT(0xecec);
1420 BAMBOO_DEBUGPRINT_REG(ptr);
1421 BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1423 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1424 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1426 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1428 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1429 BAMBOO_CACHE_FLUSH_LINE(ptr);
1430 } else if(isnotmarked) {
1431 // ptr is an unmarked active object on this core
1432 ALIGNSIZE(size, &isize);
1433 gccurr_heaptop += isize;
1435 BAMBOO_DEBUGPRINT(0xaaaa);
1436 BAMBOO_DEBUGPRINT_REG(ptr);
1437 BAMBOO_DEBUGPRINT_REG(isize);
1438 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1441 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1442 BAMBOO_CACHE_FLUSH_LINE(ptr);
1444 if(ptr + size > gcmarkedptrbound) {
1445 gcmarkedptrbound = ptr + size;
1446 } // if(ptr + size > gcmarkedptrbound)
1448 // ptr is not an active obj or has been marked
1450 } // if(isLarge(ptr, &type, &size)) else ...
1451 } /* can never reach here
1454 if(BAMBOO_NUM_OF_CORE == 0) {
1455 BAMBOO_DEBUGPRINT(0xbbbb);
1456 BAMBOO_DEBUGPRINT_REG(host);
1457 BAMBOO_DEBUGPRINT_REG(ptr);
1460 // check if this obj has been forwarded
1461 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1462 // send a msg to host informing that ptr is active
1463 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1464 gcself_numsendobjs++;
1465 MGCHashadd(gcforwardobjtbl, (int)ptr);
1468 }// if(isLocal(ptr)) else ...*/
1469 } // if(ISSHAREDOBJ(ptr))
1471 BAMBOO_DEBUGPRINT(0xed06);
1475 // scan all pointers in ptr
1476 unsigned INTPTR * pointer;
1477 pointer=pointerarray[type];
1479 /* Array of primitives */
1481 } else if (((INTPTR)pointer)==1) {
1482 /* Array of pointers */
1483 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1484 int length=ao->___length___;
1486 for(j=0; j<length; j++) {
1488 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1492 INTPTR size=pointer[0];
1494 for(i=1; i<=size; i++) {
1495 unsigned int offset=pointer[i];
1496 void * objptr=*((void **)(((char *)ptr)+offset));
1499 } // if (pointer==0) else if ... else ...
1501 } // while(gc_moreItems2())
1503 BAMBOO_DEBUGPRINT(0xed07);
1505 gcbusystatus = false;
1506 // send mark finish msg to core coordinator
1507 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1509 BAMBOO_DEBUGPRINT(0xed08);
1511 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1512 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1513 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1514 gcself_numreceiveobjs;
1515 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1519 BAMBOO_DEBUGPRINT(0xed09);
1521 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1522 gcself_numsendobjs, gcself_numreceiveobjs, false);
1525 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1527 BAMBOO_DEBUGPRINT(0xed0a);
1530 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1532 BAMBOO_DEBUGPRINT(0xed0b);
1536 } // while(MARKPHASE == gcphase)
1541 inline void compact2Heaptophelper_I(int coren,
1546 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1547 if(STARTUPCORE == coren) {
1549 gcmovestartaddr = *p;
1550 gcdstcore = gctopcore;
1551 gcblock2fill = *numblocks + 1;
1553 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1556 BAMBOO_DEBUGPRINT_REG(coren);
1557 BAMBOO_DEBUGPRINT_REG(gctopcore);
1558 BAMBOO_DEBUGPRINT_REG(*p);
1559 BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1561 if(memneed < *remain) {
1563 BAMBOO_DEBUGPRINT(0xd104);
1566 gcrequiredmems[coren] = 0;
1567 gcloads[gctopcore] += memneed;
1568 *remain = *remain - memneed;
1571 BAMBOO_DEBUGPRINT(0xd105);
1573 // next available block
1575 gcfilledblocks[gctopcore] += 1;
1577 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1578 gcloads[gctopcore] = newbase;
1579 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1580 gcstopblock[gctopcore]++;
1581 gctopcore = NEXTTOPCORE(gctopblock);
1583 *numblocks = gcstopblock[gctopcore];
1584 *p = gcloads[gctopcore];
1586 *remain=(b<NUMCORES4GC) ?
1587 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1588 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1590 BAMBOO_DEBUGPRINT(0xd106);
1591 BAMBOO_DEBUGPRINT_REG(gctopcore);
1592 BAMBOO_DEBUGPRINT_REG(*p);
1593 BAMBOO_DEBUGPRINT_REG(b);
1594 BAMBOO_DEBUGPRINT_REG(*remain);
1596 } // if(memneed < remain)
1598 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1600 inline void compact2Heaptop() {
1601 // no cores with spare mem and some cores are blocked with pending move
1602 // find the current heap top and make them move to the heap top
1604 int numblocks = gcfilledblocks[gctopcore];
1605 //BASEPTR(gctopcore, numblocks, &p);
1606 p = gcloads[gctopcore];
1609 int remain = (b<NUMCORES4GC) ?
1610 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1611 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1612 // check if the top core finishes
1613 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1614 if(gccorestatus[gctopcore] != 0) {
1616 BAMBOO_DEBUGPRINT(0xd101);
1617 BAMBOO_DEBUGPRINT_REG(gctopcore);
1619 // let the top core finishes its own work first
1620 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1621 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1624 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1627 BAMBOO_DEBUGPRINT(0xd102);
1628 BAMBOO_DEBUGPRINT_REG(gctopcore);
1629 BAMBOO_DEBUGPRINT_REG(p);
1630 BAMBOO_DEBUGPRINT_REG(b);
1631 BAMBOO_DEBUGPRINT_REG(remain);
1633 for(int i = 0; i < NUMCORES4GC; i++) {
1634 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1635 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1637 BAMBOO_DEBUGPRINT(0xd103);
1639 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1640 if(gccorestatus[gctopcore] != 0) {
1642 BAMBOO_DEBUGPRINT(0xd101);
1643 BAMBOO_DEBUGPRINT_REG(gctopcore);
1645 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1646 // the top core is not free now
1649 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1650 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1651 } // for(i = 0; i < NUMCORES4GC; i++)
1653 BAMBOO_DEBUGPRINT(0xd106);
1655 } // void compact2Heaptop()
1657 inline void resolvePendingMoveRequest() {
1659 BAMBOO_DEBUGPRINT(0xeb01);
1662 BAMBOO_DEBUGPRINT(0xeeee);
1663 for(int k = 0; k < NUMCORES4GC; k++) {
1664 BAMBOO_DEBUGPRINT(0xf000+k);
1665 BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1666 BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1667 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1668 BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1670 BAMBOO_DEBUGPRINT(0xffff);
1674 bool nosparemem = true;
1675 bool haspending = false;
1676 bool hasrunning = false;
1677 bool noblock = false;
1678 int dstcore = 0; // the core who need spare mem
1679 int sourcecore = 0; // the core who has spare mem
1680 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1682 // check if there are cores with spare mem
1683 if(gccorestatus[i] == 0) {
1684 // finished working, check if it still have spare mem
1685 if(gcfilledblocks[i] < gcstopblock[i]) {
1686 // still have spare mem
1689 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1694 if(gccorestatus[j] != 0) {
1695 // not finished, check if it has pending move requests
1696 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1701 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1702 } // if(gccorestatus[i] == 0) else ...
1704 } // if(!haspending)
1705 if(!nosparemem && haspending) {
1709 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1710 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1711 gcrequiredmems[dstcore],
1714 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1716 BAMBOO_DEBUGPRINT(0xeb02);
1717 BAMBOO_DEBUGPRINT_REG(sourcecore);
1718 BAMBOO_DEBUGPRINT_REG(dstcore);
1719 BAMBOO_DEBUGPRINT_REG(startaddr);
1720 BAMBOO_DEBUGPRINT_REG(tomove);
1722 if(STARTUPCORE == dstcore) {
1724 BAMBOO_DEBUGPRINT(0xeb03);
1726 gcdstcore = sourcecore;
1728 gcmovestartaddr = startaddr;
1729 gcblock2fill = tomove;
1732 BAMBOO_DEBUGPRINT(0xeb04);
1734 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1735 startaddr, tomove, false);
1742 } // for(i = 0; i < NUMCORES4GC; i++)
1744 BAMBOO_DEBUGPRINT(0xcccc);
1745 BAMBOO_DEBUGPRINT_REG(hasrunning);
1746 BAMBOO_DEBUGPRINT_REG(haspending);
1747 BAMBOO_DEBUGPRINT_REG(noblock);
1750 if(!hasrunning && !noblock) {
1751 gcphase = SUBTLECOMPACTPHASE;
1755 } // void resovePendingMoveRequest()
1758 int numblocks; // block num for heap
1759 INTPTR base; // base virtual address of current heap block
1760 INTPTR ptr; // virtual address of current heap top
1761 int offset; // offset in current heap block
1762 int blockbase; // virtual address of current small block to check
1763 int blockbound; // bound virtual address of current small blcok
1764 int sblockindex; // index of the small blocks
1765 int top; // real size of current heap block to check
1766 int bound; // bound size of current heap block to check
1767 }; // struct moveHelper
1769 // If out of boundary of valid shared memory, return false, else return true
1770 inline bool nextSBlock(struct moveHelper * orig) {
1771 orig->blockbase = orig->blockbound;
1772 bool sbchanged = false;
1774 BAMBOO_DEBUGPRINT(0xecc0);
1775 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1776 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1777 BAMBOO_DEBUGPRINT_REG(orig->bound);
1778 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1781 // check if across a big block
1782 // TODO now do not zero out the whole memory, maybe the last two conditions
1784 if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
1785 || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
1786 || ((*((int*)orig->blockbase))==0)) {
1788 // end of current heap block, jump to next one
1791 BAMBOO_DEBUGPRINT(0xecc1);
1792 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1794 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1796 BAMBOO_DEBUGPRINT(orig->base);
1798 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1800 orig->ptr = orig->base; // set current ptr to out of boundary too
1803 //orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1804 orig->blockbase = orig->base;
1805 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1808 BLOCKINDEX(orig->base, &blocknum);
1809 if(bamboo_smemtbl[blocknum] == 0) {
1811 goto innernextSBlock;
1813 // check the bamboo_smemtbl to decide the real bound
1814 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1815 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1816 orig->sblockindex += 1;
1818 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1820 // check if this sblock should be skipped or have special start point
1821 if(gcsbstarttbl[orig->sblockindex] == -1) {
1824 BAMBOO_DEBUGPRINT(0xecc2);
1826 orig->sblockindex += 1;
1827 orig->blockbase += BAMBOO_SMEM_SIZE;
1828 goto outernextSBlock;
1829 } else if((gcsbstarttbl[orig->sblockindex] != 0)
1831 // the first time to access this SBlock
1833 BAMBOO_DEBUGPRINT(0xecc3);
1835 // not start from the very beginning
1836 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1837 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1839 // setup information for this sblock
1840 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1841 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1842 orig->ptr = orig->blockbase + orig->offset;
1844 BAMBOO_DEBUGPRINT(0xecc4);
1845 BAMBOO_DEBUGPRINT_REG(orig->base);
1846 BAMBOO_DEBUGPRINT_REG(orig->bound);
1847 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1848 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1849 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1850 BAMBOO_DEBUGPRINT_REG(orig->offset);
1852 if(orig->ptr >= orig->bound) {
1853 // met a lobj, move to next block
1854 goto innernextSBlock;
1858 } // bool nextSBlock(struct moveHelper * orig)
1860 // return false if there are no available data to compact
1861 inline bool initOrig_Dst(struct moveHelper * orig,
1862 struct moveHelper * to) {
1865 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1866 to->bound = BAMBOO_SMEM_SIZE_L;
1867 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1870 BAMBOO_DEBUGPRINT(0xef01);
1871 BAMBOO_DEBUGPRINT_REG(to->base);
1873 to->ptr = to->base + to->offset;
1874 #ifdef GC_CACHE_ADAPT
1875 // initialize the gc_cache_revise_information
1876 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1877 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1878 ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1879 gc_cache_revise_infomation.to_page_index =
1880 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
1881 gc_cache_revise_infomation.orig_page_start_va = -1;
1882 #endif // GC_CACHE_ADAPT
1884 // init the orig ptr
1885 orig->numblocks = 0;
1886 orig->base = to->base;
1888 BLOCKINDEX(orig->base, &blocknum);
1889 // check the bamboo_smemtbl to decide the real bound
1890 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1891 orig->blockbase = orig->base;
1892 orig->sblockindex = (orig->base - gcbaseva) / BAMBOO_SMEM_SIZE;
1894 BAMBOO_DEBUGPRINT(0xef02);
1895 BAMBOO_DEBUGPRINT_REG(orig->base);
1896 BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1897 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1898 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1901 if(gcsbstarttbl[orig->sblockindex] == -1) {
1903 BAMBOO_DEBUGPRINT(0xef03);
1907 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1908 return nextSBlock(orig);
1909 } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1911 BAMBOO_DEBUGPRINT(0xef04);
1913 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1916 BAMBOO_DEBUGPRINT(0xef05);
1918 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1919 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1920 orig->ptr = orig->blockbase + orig->offset;
1922 BAMBOO_DEBUGPRINT(0xef06);
1923 BAMBOO_DEBUGPRINT_REG(orig->base);
1927 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1929 inline void nextBlock(struct moveHelper * to) {
1930 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1931 to->bound += BAMBOO_SMEM_SIZE;
1933 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1934 to->offset = BAMBOO_CACHE_LINE_SIZE;
1935 to->ptr = to->base + to->offset;
1936 } // void nextBlock(struct moveHelper * to)
1938 // endaddr does not contain spaces for headers
1939 inline bool moveobj(struct moveHelper * orig,
1940 struct moveHelper * to,
1942 if(stopblock == 0) {
1947 BAMBOO_DEBUGPRINT(0xe201);
1948 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949 BAMBOO_DEBUGPRINT_REG(to->ptr);
1957 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1958 orig->ptr = (int*)(orig->ptr) + 1;
1960 #ifdef GC_CACHE_ADAPT
1961 if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
1962 // end of an orig page
1963 // compute the impact of this page for the new page
1965 ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
1966 // /((float)(BAMBOO_PAGE_SIZE));
1967 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1968 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
1969 gc_cache_revise_infomation.to_page_index] += (int)(
1970 ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
1971 gc_cache_revise_infomation.orig_page_index]*tmp_factor/((float)(BAMBOO_PAGE_SIZE)));
1973 /*VA tmp_va = (int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index);
1975 BLOCKINDEX(tmp_va, &block);
1976 int coren = gc_block2core[block%(NUMCORES4GC*2)];
1977 if((((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
1978 gc_cache_revise_infomation.orig_page_index] != 0)
1980 tprintf("++ %x(%d) %d %d(%x, %x, %x, %x)\n", tmp_va, gc_cache_revise_infomation.orig_page_index, tt, ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)gccachesamplingtbl, (int)gccachesamplingtbl_r, (int)&((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)&((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[gc_cache_revise_infomation.to_page_index]);
1983 // prepare for an new orig page
1984 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1985 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1986 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1987 gc_cache_revise_infomation.orig_page_index =
1988 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1989 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1992 if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
1993 if(!nextSBlock(orig)) {
1994 // finished, no more data
2000 BAMBOO_DEBUGPRINT(0xe202);
2001 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2002 BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
2004 // check the obj's type, size and mark flag
2005 type = ((int *)(orig->ptr))[0];
2008 // end of this block, go to next one
2009 if(!nextSBlock(orig)) {
2010 // finished, no more data
2014 } else if(type < NUMCLASSES) {
2016 size = classsize[type];
2019 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
2020 int elementsize=classsize[type];
2021 int length=ao->___length___;
2022 size=sizeof(struct ArrayObject)+length*elementsize;
2024 mark = ((int *)(orig->ptr))[6];
2025 bool isremote = ((((int *)(orig->ptr))[6] & REMOTEM) != 0);
2027 BAMBOO_DEBUGPRINT(0xe203);
2028 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2029 BAMBOO_DEBUGPRINT_REG(size);
2031 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
2032 // should be able to across it
2033 if((mark & MARKED) != 0) {
2035 BAMBOO_DEBUGPRINT(0xe204);
2040 // marked obj, copy it to current heap top
2041 // check to see if remaining space is enough
2042 if(to->top + isize > to->bound) {
2043 // fill 0 indicating the end of this block
2044 BAMBOO_MEMSET_WH(to->ptr, '\0', to->bound - to->top);
2045 // fill the header of this block and then go to next block
2046 to->offset += to->bound - to->top;
2047 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2048 (*((int*)(to->base))) = to->offset;
2049 #ifdef GC_CACHE_ADAPT
2050 int tmp_ptr = to->ptr;
2051 #endif // GC_CACHE_ADAPT
2053 #ifdef GC_CACHE_ADAPT
2054 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2055 // end of an to page, wrap up its information
2057 ((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va));
2058 // /((float)(BAMBOO_PAGE_SIZE));
2059 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2060 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
2061 gc_cache_revise_infomation.to_page_index] += (int)(
2062 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2063 gc_cache_revise_infomation.orig_page_index]*tmp_factor/((float)(BAMBOO_PAGE_SIZE)));
2065 /*VA tmp_va = (int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index);
2067 BLOCKINDEX(tmp_va, &block);
2068 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2069 if((((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2070 gc_cache_revise_infomation.orig_page_index] != 0)
2072 tprintf("-- %x(%d) %d %d(%x, %x, %x, %x)\n", tmp_va, gc_cache_revise_infomation.orig_page_index, tt, ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)gccachesamplingtbl, (int)gccachesamplingtbl_r, (int)&((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)&((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[gc_cache_revise_infomation.to_page_index]);
2075 // prepare for an new to page
2076 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2077 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2078 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2079 gc_cache_revise_infomation.orig_page_index =
2080 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2081 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2082 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2083 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2084 gc_cache_revise_infomation.to_page_index =
2085 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2087 #endif // GC_CACHE_ADAPT
2088 if(stopblock == to->numblocks) {
2089 // already fulfilled the block
2091 } // if(stopblock == to->numblocks)
2092 } // if(to->top + isize > to->bound)
2093 // set the mark field to 2, indicating that this obj has been moved
2094 // and need to be flushed
2095 ((int *)(orig->ptr))[6] = COMPACTED;
2096 if(to->ptr != orig->ptr) {
2097 if((int)(orig->ptr) < (int)(to->ptr)+size) {
2098 memmove(to->ptr, orig->ptr, size);
2100 //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
2101 memcpy(to->ptr, orig->ptr, size);
2103 // fill the remaining space with -2
2104 BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
2106 // store mapping info
2107 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2108 #ifdef LOCALHASHTBL_TEST
2109 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2111 mgchashInsert_I(gcpointertbl, orig->ptr, to->ptr);
2113 //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2115 // add to the sharedptbl
2116 if(gcsharedptbl != NULL) {
2117 //GCSharedHashadd_I(gcsharedptbl, orig->ptr, to->ptr);
2118 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
2121 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2124 BAMBOO_DEBUGPRINT(0xcdce);
2125 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2126 BAMBOO_DEBUGPRINT_REG(to->ptr);
2127 BAMBOO_DEBUGPRINT_REG(isize);
2129 gccurr_heaptop -= isize;
2131 to->offset += isize;
2133 #ifdef GC_CACHE_ADAPT
2134 int tmp_ptr = to->ptr;
2135 #endif // GC_CACHE_ADAPT
2136 if(to->top == to->bound) {
2137 // fill the header of this block and then go to next block
2138 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2139 (*((int*)(to->base))) = to->offset;
2142 #ifdef GC_CACHE_ADAPT
2143 if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2144 // end of an to page, wrap up its information
2146 ((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va));
2147 // /((float)(BAMBOO_PAGE_SIZE));
2148 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2149 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
2150 gc_cache_revise_infomation.to_page_index] += (int)(
2151 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2152 gc_cache_revise_infomation.orig_page_index]*tmp_factor/((float)(BAMBOO_PAGE_SIZE)));
2154 /*VA tmp_va = (int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index);
2156 BLOCKINDEX(tmp_va, &block);
2157 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2158 if((((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2159 gc_cache_revise_infomation.orig_page_index] != 0)
2161 tprintf("== %x(%d) %d %d(%x, %x, %x, %x)\n", tmp_va, gc_cache_revise_infomation.orig_page_index, tt, ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)gccachesamplingtbl, (int)gccachesamplingtbl_r, (int)&((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)&((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[gc_cache_revise_infomation.to_page_index]);
2164 // prepare for an new to page
2165 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2166 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2167 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2168 gc_cache_revise_infomation.orig_page_index =
2169 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2170 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2171 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2172 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2173 gc_cache_revise_infomation.to_page_index =
2174 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2176 #endif // GC_CACHE_ADAPT
2179 BAMBOO_DEBUGPRINT(0xe205);
2185 BAMBOO_DEBUGPRINT_REG(isize);
2186 BAMBOO_DEBUGPRINT_REG(size);
2187 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2188 BAMBOO_DEBUGPRINT_REG(orig->bound);
2190 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2192 BAMBOO_DEBUGPRINT(0xe206);
2194 if(!nextSBlock(orig)) {
2195 // finished, no more data
2200 BAMBOO_DEBUGPRINT(0xe207);
2201 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2204 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2206 // should be invoked with interrupt closed
2207 inline int assignSpareMem_I(int sourcecore,
2212 BLOCKINDEX(gcloads[sourcecore], &b);
2213 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2214 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2215 int remain = boundptr - gcloads[sourcecore];
2216 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2217 *startaddr = gcloads[sourcecore];
2218 *tomove = gcfilledblocks[sourcecore] + 1;
2219 if(memneed < remain) {
2220 gcloads[sourcecore] += memneed;
2223 // next available block
2224 gcfilledblocks[sourcecore] += 1;
2226 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2227 gcloads[sourcecore] = newbase;
2228 return requiredmem-remain;
2230 } // int assignSpareMem_I(int ,int * , int * , int * )
2232 // should be invoked with interrupt closed
2233 inline bool gcfindSpareMem_I(int * startaddr,
2238 for(int k = 0; k < NUMCORES4GC; k++) {
2239 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2240 // check if this stopped core has enough mem
2241 assignSpareMem_I(k, requiredmem, tomove, startaddr);
2246 // if can not find spare mem right now, hold the request
2247 gcrequiredmems[requiredcore] = requiredmem;
2250 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2252 inline bool compacthelper(struct moveHelper * orig,
2253 struct moveHelper * to,
2256 bool * localcompact) {
2257 // scan over all objs in this block, compact the marked objs
2258 // loop stop when finishing either scanning all active objs or
2259 // fulfilled the gcstopblock
2261 BAMBOO_DEBUGPRINT(0xe101);
2262 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2263 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2266 while(orig->ptr < gcmarkedptrbound) {
2267 bool stop = moveobj(orig, to, gcblock2fill);
2272 #ifdef GC_CACHE_ADAPT
2273 // end of an to page, wrap up its information
2275 ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
2276 // /((float)(BAMBOO_PAGE_SIZE));
2277 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2278 ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
2279 gc_cache_revise_infomation.to_page_index] += (int)(
2280 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2281 gc_cache_revise_infomation.orig_page_index]*tmp_factor/((float)(BAMBOO_PAGE_SIZE)));
2283 /*VA tmp_va = (int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index);
2285 BLOCKINDEX(tmp_va, &block);
2286 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2287 if((((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
2288 gc_cache_revise_infomation.orig_page_index] != 0)
2290 tprintf("** %x(%d) %d %d(%x, %x, %x, %x)\n", tmp_va, gc_cache_revise_infomation.orig_page_index, tt, ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)gccachesamplingtbl, (int)gccachesamplingtbl_r, (int)&((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index], (int)&((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[gc_cache_revise_infomation.to_page_index]);
2293 #endif // GC_CACHE_ADAPT
2294 // if no objs have been compact, do nothing,
2295 // otherwise, fill the header of this block
2296 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2297 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2298 (*((int*)(to->base))) = to->offset;
2302 to->top -= BAMBOO_CACHE_LINE_SIZE;
2303 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2305 *heaptopptr = to->ptr;
2306 *filledblocks = to->numblocks;
2309 BAMBOO_DEBUGPRINT(0xe102);
2310 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2311 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2312 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2313 BAMBOO_DEBUGPRINT_REG(*filledblocks);
2314 BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2317 // send msgs to core coordinator indicating that the compact is finishing
2318 // send compact finish message to core coordinator
2319 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2320 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2321 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2322 if(orig->ptr < gcmarkedptrbound) {
2324 BAMBOO_DEBUGPRINT(0xe103);
2328 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2329 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2330 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2332 BAMBOO_DEBUGPRINT(0xe104);
2336 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2338 BAMBOO_DEBUGPRINT(0xe105);
2342 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2345 BAMBOO_DEBUGPRINT(0xe106);
2347 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2352 if(orig->ptr < gcmarkedptrbound) {
2354 BAMBOO_DEBUGPRINT(0xe107);
2358 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2359 *filledblocks, *heaptopptr, gccurr_heaptop, false);
2362 BAMBOO_DEBUGPRINT(0xe108);
2363 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2365 // finish compacting
2366 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2367 *filledblocks, *heaptopptr, 0, false);
2369 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2371 if(orig->ptr < gcmarkedptrbound) {
2373 BAMBOO_DEBUGPRINT(0xe109);
2375 // still have unpacked obj
2384 BAMBOO_DEBUGPRINT(0xe10a);
2387 to->ptr = gcmovestartaddr;
2388 to->numblocks = gcblock2fill - 1;
2389 to->bound = (to->numblocks==0) ?
2390 BAMBOO_SMEM_SIZE_L :
2391 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2392 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2393 to->offset = to->ptr - to->base;
2394 to->top = (to->numblocks==0) ?
2395 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2397 to->offset = BAMBOO_CACHE_LINE_SIZE;
2398 to->ptr += to->offset; // for header
2399 to->top += to->offset;
2400 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2401 *localcompact = true;
2403 *localcompact = false;
2405 #ifdef GC_CACHE_ADAPT
2406 // initialize the gc_cache_revise_information
2407 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2408 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2409 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2410 gc_cache_revise_infomation.to_page_index =
2411 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2412 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2413 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2414 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2415 gc_cache_revise_infomation.orig_page_index =
2416 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2417 #endif // GC_CACHE_ADAPT
2421 BAMBOO_DEBUGPRINT(0xe10b);
2424 } // void compacthelper()
2426 inline void compact() {
2427 if(COMPACTPHASE != gcphase) {
2428 BAMBOO_EXIT(0xb102);
2431 // initialize pointers for comapcting
2432 struct moveHelper * orig =
2433 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2434 struct moveHelper * to =
2435 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2437 if(!initOrig_Dst(orig, to)) {
2438 // no available data to compact
2439 // send compact finish msg to STARTUP core
2441 BAMBOO_DEBUGPRINT(0xe001);
2442 BAMBOO_DEBUGPRINT_REG(to->base);
2444 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2445 0, to->base, 0, false);
2450 #ifdef GC_CACHE_ADAPT
2451 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2452 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2453 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2454 gc_cache_revise_infomation.orig_page_index =
2455 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2456 #endif // GC_CACHE_ADAPT
2458 int filledblocks = 0;
2459 INTPTR heaptopptr = 0;
2460 bool localcompact = true;
2461 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2467 // if return NULL, means
2468 // 1. objptr is NULL
2469 // 2. objptr is not a shared obj
2470 // in these cases, remain the original value is OK
2471 inline void * flushObj(void * objptr) {
2473 BAMBOO_DEBUGPRINT(0xe401);
2475 if(objptr == NULL) {
2478 void * dstptr = NULL;
2479 if(ISSHAREDOBJ(objptr)) {
2481 BAMBOO_DEBUGPRINT(0xe402);
2482 BAMBOO_DEBUGPRINT_REG(objptr);
2484 // a shared obj ptr, change to new address
2485 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2487 //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
2489 #ifdef LOCALHASHTBL_TEST
2490 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2492 dstptr = mgchashSearch(gcpointertbl, objptr);
2494 //MGCHashget(gcpointertbl, objptr, &dstptr);
2496 //flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
2498 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2500 BAMBOO_DEBUGPRINT_REG(dstptr);
2503 if(NULL == dstptr) {
2506 BAMBOO_DEBUGPRINT(0xe403);
2507 BAMBOO_DEBUGPRINT_REG(objptr);
2508 BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2510 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2511 // error! the obj is right on this core, but cannot find it
2512 //BAMBOO_DEBUGPRINT(0xecec);
2513 BAMBOO_DEBUGPRINT_REG(objptr);
2514 BAMBOO_EXIT(0xb103);
2515 // assume that the obj has not been moved, use the original address
2518 int hostc = hostcore(objptr);
2520 //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
2522 // check the corresponsing sharedptbl
2523 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2524 //struct GCSharedHash * sptbl = gcrpointertbls[hostcore(objptr)];
2525 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2527 //GCSharedHashget(sptbl, (int)objptr, &dstptr);
2528 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2529 if(dstptr != NULL) {
2530 #ifdef LOCALHASHTBL_TEST
2531 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2533 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2537 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2539 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2542 if(dstptr == NULL) {
2543 // still can not get the mapping info,
2544 // send msg to host core for the mapping info
2545 gcobj2map = (int)objptr;
2548 // the first time require the mapping, send msg to the hostcore
2549 // for the mapping info
2550 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2551 BAMBOO_NUM_OF_CORE, false);
2558 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2560 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2561 #ifdef LOCALHASHTBL_TEST
2562 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2564 dstptr = mgchashSearch(gcpointertbl, objptr);
2566 //MGCHashget(gcpointertbl, objptr, &dstptr);
2567 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2568 } // if(dstptr == NULL)
2569 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2571 BAMBOO_DEBUGPRINT_REG(dstptr);
2573 } // if(NULL == dstptr)
2574 } // if(ISSHAREDOBJ(objptr))
2575 // if not a shared obj, return NULL to indicate no need to flush
2577 BAMBOO_DEBUGPRINT(0xe404);
2580 } // void flushObj(void * objptr)
2582 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2584 // flush current stack
2585 while(stackptr!=NULL) {
2586 for(i=0; i<stackptr->size; i++) {
2587 if(stackptr->array[i] != NULL) {
2588 void * dst = flushObj(stackptr->array[i]);
2590 stackptr->array[i] = dst;
2594 stackptr=stackptr->next;
2598 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2599 for(i=0; i<NUMCLASSES; i++) {
2600 struct parameterwrapper ** queues =
2601 objectqueues[BAMBOO_NUM_OF_CORE][i];
2602 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2603 for(j = 0; j < length; ++j) {
2604 struct parameterwrapper * parameter = queues[j];
2605 struct ObjectHash * set=parameter->objectset;
2606 struct ObjectNode * ptr=set->listhead;
2608 void * dst = flushObj((void *)ptr->key);
2614 ObjectHashrehash(set);
2619 // flush current task descriptor
2620 if(currtpd != NULL) {
2621 for(i=0; i<currtpd->numParameters; i++) {
2622 void * dst = flushObj(currtpd->parameterArray[i]);
2624 currtpd->parameterArray[i] = dst;
2629 // flush active tasks
2630 if(activetasks != NULL) {
2631 struct genpointerlist * ptr=activetasks->list;
2633 struct taskparamdescriptor *tpd=ptr->src;
2635 for(i=0; i<tpd->numParameters; i++) {
2636 void * dst = flushObj(tpd->parameterArray[i]);
2638 tpd->parameterArray[i] = dst;
2643 genrehash(activetasks);
2646 // flush cached transferred obj
2647 struct QueueItem * tmpobjptr = getHead(&objqueue);
2648 while(tmpobjptr != NULL) {
2649 struct transObjInfo * objInfo =
2650 (struct transObjInfo *)(tmpobjptr->objectptr);
2651 void * dst = flushObj(objInfo->objptr);
2653 objInfo->objptr = dst;
2655 tmpobjptr = getNextQueueItem(tmpobjptr);
2658 // flush cached objs to be transferred
2659 struct QueueItem * item = getHead(totransobjqueue);
2660 while(item != NULL) {
2661 struct transObjInfo * totransobj =
2662 (struct transObjInfo *)(item->objectptr);
2663 void * dst = flushObj(totransobj->objptr);
2665 totransobj->objptr = dst;
2667 item = getNextQueueItem(item);
2668 } // while(item != NULL)
2670 // enqueue lock related info
2671 for(i = 0; i < runtime_locklen; ++i) {
2672 void * dst = flushObj(runtime_locks[i].redirectlock);
2674 runtime_locks[i].redirectlock = (int)dst;
2676 if(runtime_locks[i].value != NULL) {
2677 void * dst=flushObj(runtime_locks[i].value);
2679 runtime_locks[i].value = (int)dst;
2684 } // void flushRuntimeObj(struct garbagelist * stackptr)
2686 inline void transmappinginfo() {
2687 // broadcast the sharedptbl pointer
2688 for(int i = 0; i < NUMCORESACTIVE; i++) {
2689 if(i != BAMBOO_NUM_OF_CORE) {
2690 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2694 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2695 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2699 inline void flush(struct garbagelist * stackptr) {
2701 flushRuntimeObj(stackptr);
2704 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2705 bool hasItems = gc_moreItems_I();
2706 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2712 BAMBOO_DEBUGPRINT(0xe301);
2714 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2715 void * ptr = gc_dequeue_I();
2716 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2717 if(ISSHAREDOBJ(ptr)) {
2718 // should be a local shared obj and should have mapping info
2719 ptr = flushObj(ptr);
2721 BAMBOO_DEBUGPRINT(0xe302);
2722 BAMBOO_DEBUGPRINT_REG(ptr);
2723 BAMBOO_DEBUGPRINT_REG(tptr);
2724 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2727 BAMBOO_EXIT(0xb105);
2729 } // if(ISSHAREDOBJ(ptr))
2730 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2731 int type = ((int *)(ptr))[0];
2732 // scan all pointers in ptr
2733 unsigned INTPTR * pointer;
2734 pointer=pointerarray[type];
2736 BAMBOO_DEBUGPRINT(0xe303);
2737 BAMBOO_DEBUGPRINT_REG(pointer);
2740 /* Array of primitives */
2742 } else if (((INTPTR)pointer)==1) {
2744 BAMBOO_DEBUGPRINT(0xe304);
2746 /* Array of pointers */
2747 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2748 int length=ao->___length___;
2750 for(j=0; j<length; j++) {
2752 BAMBOO_DEBUGPRINT(0xe305);
2755 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2757 BAMBOO_DEBUGPRINT_REG(objptr);
2759 if(objptr != NULL) {
2760 void * dst = flushObj(objptr);
2762 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2768 BAMBOO_DEBUGPRINT(0xe306);
2770 INTPTR size=pointer[0];
2772 for(i=1; i<=size; i++) {
2774 BAMBOO_DEBUGPRINT(0xe307);
2776 unsigned int offset=pointer[i];
2777 void * objptr=*((void **)(((char *)ptr)+offset));
2779 BAMBOO_DEBUGPRINT_REG(objptr);
2781 if(objptr != NULL) {
2782 void * dst = flushObj(objptr);
2784 *((void **)(((char *)ptr)+offset)) = dst;
2787 } // for(i=1; i<=size; i++)
2788 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2789 // restore the mark field, indicating that this obj has been flushed
2790 if(ISSHAREDOBJ(ptr)) {
2791 ((int *)(ptr))[6] = INIT;
2793 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2794 } // while(gc_moreItems())
2796 BAMBOO_DEBUGPRINT(0xe308);
2799 // TODO bug here: the startup core contains all lobjs' info, thus all the
2800 // lobjs are flushed in sequence.
2802 while(gc_lobjmoreItems_I()) {
2804 BAMBOO_DEBUGPRINT(0xe309);
2806 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2807 ptr = flushObj(ptr);
2809 BAMBOO_DEBUGPRINT(0xe30a);
2810 BAMBOO_DEBUGPRINT_REG(ptr);
2811 BAMBOO_DEBUGPRINT_REG(tptr);
2812 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2815 BAMBOO_EXIT(0xb106);
2817 if(((int *)(ptr))[6] == COMPACTED) {
2818 int type = ((int *)(ptr))[0];
2819 // scan all pointers in ptr
2820 unsigned INTPTR * pointer;
2821 pointer=pointerarray[type];
2823 BAMBOO_DEBUGPRINT(0xe30b);
2824 BAMBOO_DEBUGPRINT_REG(pointer);
2827 /* Array of primitives */
2829 } else if (((INTPTR)pointer)==1) {
2831 BAMBOO_DEBUGPRINT(0xe30c);
2833 /* Array of pointers */
2834 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2835 int length=ao->___length___;
2837 for(j=0; j<length; j++) {
2839 BAMBOO_DEBUGPRINT(0xe30d);
2842 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2844 BAMBOO_DEBUGPRINT_REG(objptr);
2846 if(objptr != NULL) {
2847 void * dst = flushObj(objptr);
2849 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2855 BAMBOO_DEBUGPRINT(0xe30e);
2857 INTPTR size=pointer[0];
2859 for(i=1; i<=size; i++) {
2861 BAMBOO_DEBUGPRINT(0xe30f);
2863 unsigned int offset=pointer[i];
2864 void * objptr=*((void **)(((char *)ptr)+offset));
2867 BAMBOO_DEBUGPRINT_REG(objptr);
2869 if(objptr != NULL) {
2870 void * dst = flushObj(objptr);
2872 *((void **)(((char *)ptr)+offset)) = dst;
2875 } // for(i=1; i<=size; i++)
2876 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2877 // restore the mark field, indicating that this obj has been flushed
2878 ((int *)(ptr))[6] = INIT;
2879 } // if(((int *)(ptr))[6] == COMPACTED)
2880 } // while(gc_lobjmoreItems())
2882 BAMBOO_DEBUGPRINT(0xe310);
2885 // send flush finish message to core coordinator
2886 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2887 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2889 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2892 BAMBOO_DEBUGPRINT(0xe311);
2896 #ifdef GC_CACHE_ADAPT
2897 // prepare for cache adaption:
2898 // -- flush the shared heap
2899 // -- clean dtlb entries
2900 // -- change cache strategy
2901 void cacheAdapt_gc(bool isgccachestage) {
2902 // flush the shared heap
2903 BAMBOO_CACHE_FLUSH_L2();
2905 // clean the dtlb entries
2906 BAMBOO_CLEAN_DTLB();
2908 // change the cache strategy
2909 gccachestage = isgccachestage;
2910 } // cacheAdapt_gc(bool isgccachestage)
2912 // the master core decides how to adapt cache strategy for the mutator
2913 // according to collected statistic data
2915 // make all pages hfh
2916 int cacheAdapt_policy_h4h(){
2917 unsigned int page_index = 0;
2919 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2921 int * tmp_p = gccachepolicytbl+1;
2922 for(page_index = 0; page_index < page_num; page_index++) {
2923 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2924 bamboo_cache_policy_t policy = {0};
2925 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2926 *tmp_p = page_index;
2928 *tmp_p = policy.word;
2934 } // int cacheAdapt_policy_hfh()
2936 // make all pages local as non-cache-adaptable gc local mode
2937 int cacheAdapt_policy_local(){
2938 unsigned int page_index = 0;
2940 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2942 int * tmp_p = gccachepolicytbl+1;
2943 for(page_index = 0; page_index < page_num; page_index++) {
2944 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2945 bamboo_cache_policy_t policy = {0};
2947 BLOCKINDEX(page_sva, &block);
2948 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2949 // locally cache the page in the hotest core
2950 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2951 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2952 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2953 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2954 *tmp_p = page_index;
2956 *tmp_p = policy.word;
2962 } // int cacheAdapt_policy_local()
2964 int cacheAdapt_policy_hotest(){
2965 unsigned int page_index = 0;
2967 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2969 int * tmp_p = gccachepolicytbl+1;
2970 for(page_index = 0; page_index < page_num; page_index++) {
2971 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2972 bamboo_cache_policy_t policy = {0};
2976 for(int i = 0; i < NUMCORESACTIVE; i++) {
2977 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
2978 +size_cachesamplingtbl_local_r*i);
2979 int freq = local_tbl[page_index];
2981 // check the freqency, decide if this page is hot for the core
2982 if(hotfreq < freq) {
2988 // Decide the cache strategy for this page
2989 // If decide to adapt a new cache strategy, write into the shared block of
2990 // the gcsharedsamplingtbl. The mem recording information that has been
2991 // written is enough to hold the information.
2992 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2994 // this page has not been accessed, do not change its cache policy
2997 // locally cache the page in the hotest core
2998 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2999 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3000 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3001 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3002 *tmp_p = page_index;
3004 *tmp_p = policy.word;
3011 } // int cacheAdapt_policy_hotest()
3013 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
3014 // cache the page on the core that accesses it the most if that core accesses
3015 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
3017 int cacheAdapt_policy_dominate(){
3018 unsigned int page_index = 0;
3020 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3022 int * tmp_p = gccachepolicytbl+1;
3023 for(page_index = 0; page_index < page_num; page_index++) {
3024 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3025 bamboo_cache_policy_t policy = {0};
3030 for(int i = 0; i < NUMCORESACTIVE; i++) {
3031 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3032 +size_cachesamplingtbl_local_r*i);
3033 int freq = local_tbl[page_index];
3036 // check the freqency, decide if this page is hot for the core
3037 if(hotfreq < freq) {
3042 // Decide the cache strategy for this page
3043 // If decide to adapt a new cache strategy, write into the shared block of
3045 // Format: page start va + cache policy
3047 // this page has not been accessed, do not change its cache policy
3050 totalfreq = (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100;
3051 if(hotfreq < totalfreq) {
3053 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3055 // locally cache the page in the hotest core
3056 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3057 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3058 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3059 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3061 *tmp_p = page_index;
3063 *tmp_p = policy.word;
3069 } // int cacheAdapt_policy_dominate()
3071 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 20000
3073 void gc_quicksort(int *array,
3079 int rightIdx = right;
3080 if((right-left+1) >= 1) {
3081 pivot = (left+right)/2;
3082 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
3083 int pivotValue = array[pivot*3-offset];
3084 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
3087 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
3090 // swap [leftIdx] & [rightIdx]
3091 for(int k = 0; k < 3; k++) {
3092 int tmp = array[3*rightIdx-k];
3093 array[3*rightIdx-k] = array[3*leftIdx-k];
3094 array[3*leftIdx-k] = tmp;
3098 if((leftIdx-1) == pivot) {
3099 pivot = rightIdx = rightIdx + 1;
3100 } else if((leftIdx+1) == pivot) {
3101 pivot = leftIdx = leftIdx-1;
3104 gc_quicksort(array, left, pivot-1, offset);
3105 gc_quicksort(array, pivot+1, right, offset);
3108 } // void gc_quicksort(...)
3110 // Every page cached on the core that accesses it the most.
3111 // Check to see if any core's pages total more accesses than threshold
3112 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3113 // most remote accesses and hash for home them until we get below
3114 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
3115 int cacheAdapt_policy_overload(){
3116 unsigned int page_index = 0;
3118 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3120 int * tmp_p = gccachepolicytbl+1;
3121 unsigned long long workload[NUMCORESACTIVE];
3122 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3123 unsigned long long total_workload = 0;
3124 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3125 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3126 for(page_index = 0; page_index < page_num; page_index++) {
3127 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3128 bamboo_cache_policy_t policy = {0};
3133 for(int i = 0; i < NUMCORESACTIVE; i++) {
3134 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3135 +size_cachesamplingtbl_local_r*i);
3136 int freq = local_tbl[page_index];
3139 // check the freqency, decide if this page is hot for the core
3140 if(hotfreq < freq) {
3145 /*if(page_sva == 0x10e90000) {
3146 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3150 // Decide the cache strategy for this page
3151 // If decide to adapt a new cache strategy, write into the shared block of
3152 // the gcsharedsamplingtbl. The mem recording information that has been
3153 // written is enough to hold the information.
3154 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3156 // this page has not been accessed, do not change its cache policy
3159 // locally cache the page in the hotest core
3160 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3161 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3162 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3163 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3164 *tmp_p = page_index;
3166 *tmp_p = policy.word;
3169 workload[hotestcore] += totalfreq;
3170 total_workload += totalfreq;
3171 // insert into core2heavypages using quicksort
3172 int remoteaccess = totalfreq - hotfreq;
3173 int index = core2heavypages[hotestcore][0];
3174 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3175 core2heavypages[hotestcore][3*index+2] = totalfreq;
3176 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3177 core2heavypages[hotestcore][0]++;
3179 /*if(page_sva == 0x10f10000) {
3181 BLOCKINDEX(page_sva, &block);
3182 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3183 int coord_x = bamboo_cpu2coords[2*coren]+1;
3184 int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3185 tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3189 int workload_threshold = total_workload / 10;
3190 // Check the workload of each core
3191 for(int i = 0; i < NUMCORESACTIVE; i++) {
3193 int index = core2heavypages[i][0];
3194 if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3195 // sort according to the remoteaccess
3196 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3197 while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3198 // hfh those pages with more remote accesses
3199 bamboo_cache_policy_t policy = {0};
3200 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3201 *((int*)core2heavypages[i][j]) = policy.word;
3202 workload[i] -= core2heavypages[i][j+1];
3209 } // int cacheAdapt_policy_overload()
3211 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3212 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
3213 // Every page cached on the core that accesses it the most.
3214 // Check to see if any core's pages total more accesses than threshold
3215 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3216 // most remote accesses and hash for home them until we get below
3217 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3218 // Sort pages based on activity....
3219 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3220 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3221 // then start hfh these pages(selecting the ones with the most remote
3222 // accesses first or fewest local accesses) until we get below
3223 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3224 int cacheAdapt_policy_crowd(){
3225 unsigned int page_index = 0;
3227 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3229 int * tmp_p = gccachepolicytbl+1;
3230 unsigned long long workload[NUMCORESACTIVE];
3231 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3232 unsigned long long total_workload = 0;
3233 int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3234 memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3235 for(page_index = 0; page_index < page_num; page_index++) {
3236 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3237 bamboo_cache_policy_t policy = {0};
3242 for(int i = 0; i < NUMCORESACTIVE; i++) {
3243 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3244 +size_cachesamplingtbl_local_r*i);
3245 int freq = local_tbl[page_index];
3248 // check the freqency, decide if this page is hot for the core
3249 if(hotfreq < freq) {
3254 /*if(page_sva == 0x10e90000) {
3255 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3259 // Decide the cache strategy for this page
3260 // If decide to adapt a new cache strategy, write into the shared block of
3261 // the gcsharedsamplingtbl. The mem recording information that has been
3262 // written is enough to hold the information.
3263 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3265 // this page has not been accessed, do not change its cache policy
3268 // locally cache the page in the hotest core
3269 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3270 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3271 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3272 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3273 *tmp_p = page_index;
3275 *tmp_p = policy.word;
3278 workload[hotestcore] += totalfreq;
3279 total_workload += totalfreq;
3280 // insert into core2heavypages using quicksort
3281 int remoteaccess = totalfreq - hotfreq;
3282 int index = core2heavypages[hotestcore][0];
3283 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3284 core2heavypages[hotestcore][3*index+2] = totalfreq;
3285 core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3286 core2heavypages[hotestcore][0]++;
3288 /*if(page_sva == 0x10f10000) {
3290 BLOCKINDEX(page_sva, &block);
3291 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3292 int coord_x = bamboo_cpu2coords[2*coren]+1;
3293 int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3294 tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3298 int workload_threshold = total_workload / 10;
3299 // Check the workload of each core
3300 for(int i = 0; i < NUMCORESACTIVE; i++) {
3302 int index = core2heavypages[i][0];
3303 if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3304 // sort according to the remoteaccess
3305 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3306 while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3307 // hfh those pages with more remote accesses
3308 bamboo_cache_policy_t policy = {0};
3309 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3310 *((int*)core2heavypages[i][j]) = policy.word;
3311 workload[i] -= core2heavypages[i][j+1];
3316 // Check if the accesses are crowded on few pages
3317 // sort according to the total access
3319 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3320 int threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3321 int num_crowded = 0;
3324 t_workload += core2heavypages[i][j+num_crowded*3+1];
3326 } while(t_workload < threshold);
3327 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3328 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3329 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
3331 // need to hfh these pages
3332 // sort the pages according to remote access
3333 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3334 //while((num_crowded--) && (j < index*3)) {
3335 // h4h those pages with more remote accesses
3336 bamboo_cache_policy_t policy = {0};
3337 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3338 *((int*)core2heavypages[i][j]) = policy.word;
3339 workload[i] -= core2heavypages[i][j+1];
3340 t_workload -= core2heavypages[i][j+1];
3341 /*if((j/3+GC_CACHE_ADAPT_CROWD_THRESHOLD) < index) {
3343 core2heavypages[i][j+GC_CACHE_ADAPT_CROWD_THRESHOLD*3+1];
3346 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3347 /*if(t_workload <= threshold) {
3351 if((j < index*3) && (t_workload > threshold)) {
3352 num_crowded = ((index-j/3) > GC_CACHE_ADAPT_CROWD_THRESHOLD) ?
3353 (GC_CACHE_ADAPT_CROWD_THRESHOLD) : (index-j/3);*/
3360 } // int cacheAdapt_policy_overload()
3362 void cacheAdapt_master() {
3363 #ifdef GC_CACHE_ADAPT
3364 //gc_output_cache_sampling_r();
3365 #endif // GC_CACHE_ADAPT
3367 // check the statistic data
3368 // for each page, decide the new cache strategy
3369 //numchanged = cacheAdapt_policy_h4h();
3370 //numchanged = cacheAdapt_policy_local();
3371 numchanged = cacheAdapt_policy_hotest();
3372 //numchanged = cacheAdapt_policy_dominate();
3373 //numchanged = cacheAdapt_policy_overload();
3374 //numchanged = cacheAdapt_policy_crowd();
3375 *gccachepolicytbl = numchanged;
3377 //if(numchanged > 0) tprintf("=================\n");
3380 // adapt the cache strategy for the mutator
3381 void cacheAdapt_mutator() {
3382 int numchanged = *gccachepolicytbl;
3383 // check the changes and adapt them
3384 int * tmp_p = gccachepolicytbl+1;
3385 while(numchanged--) {
3386 // read out the policy
3387 int page_index = *tmp_p;
3388 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3390 /*if(BAMBOO_NUM_OF_CORE == 0) {
3391 tprintf("va: %x, policy: %d (%d,%d) \n",
3392 (int)(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva), policy.cache_mode,
3393 policy.lotar_x, policy.lotar_y);
3396 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3397 policy, BAMBOO_PAGE_SIZE);
3401 //if(BAMBOO_NUM_OF_CORE == 0) tprintf("=================\n"); // TODO
3404 void gc_output_cache_sampling() {
3405 unsigned int page_index = 0;
3407 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3408 for(page_index = 0; page_index < page_num; page_index++) {
3409 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3411 BLOCKINDEX(page_sva, &block);
3412 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3413 tprintf("va: %x page_index: %d host: %d\n",
3414 (int)page_sva, page_index, coren);
3415 for(int i = 0; i < NUMCORESACTIVE; i++) {
3416 int * local_tbl = (int *)((void *)gccachesamplingtbl
3417 +size_cachesamplingtbl_local*i);
3418 int freq = local_tbl[page_index];
3419 printf("%8d ",freq);
3423 printf("=================\n");
3424 } // gc_output_cache_sampling
3426 void gc_output_cache_sampling_r() {
3427 unsigned int page_index = 0;
3429 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3430 for(page_index = 0; page_index < page_num; page_index++) {
3431 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3433 BLOCKINDEX(page_sva, &block);
3434 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3435 tprintf("va: %x page_index: %d host: %d\n",
3436 (int)page_sva, page_index, coren);
3437 for(int i = 0; i < NUMCORESACTIVE; i++) {
3438 int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3439 +size_cachesamplingtbl_local_r*i);
3440 int freq = local_tbl[page_index];
3441 printf("%8d ",freq);
3445 printf("=================\n");
3446 } // gc_output_cache_sampling
3447 #endif // GC_CACHE_ADAPT
3449 inline void gc_collect(struct garbagelist * stackptr) {
3450 // inform the master that this core is at a gc safe point and is ready to
3452 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3453 self_numreceiveobjs, false);
3455 // core collector routine
3457 if(INITPHASE == gcphase) {
3461 #ifdef RAWPATH // TODO GC_DEBUG
3462 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3465 #ifdef GC_CACHE_ADAPT
3466 // prepare for cache adaption:
3467 cacheAdapt_gc(true);
3468 #endif // GC_CACHE_ADAPT
3469 //send init finish msg to core coordinator
3470 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3473 if(MARKPHASE == gcphase) {
3477 #ifdef RAWPATH // TODO GC_DEBUG
3478 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3479 udn_tile_coord_y());
3481 mark(true, stackptr);
3482 #ifdef RAWPATH // TODO GC_DEBUG
3483 printf("(%x,%x) Finish mark phase, start compact phase\n",
3484 udn_tile_coord_x(), udn_tile_coord_y());
3487 #ifdef RAWPATH // TODO GC_DEBUG
3488 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3489 udn_tile_coord_y());
3493 if(MAPPHASE == gcphase) {
3497 #ifdef RAWPATH // TODO GC_DEBUG
3498 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3499 udn_tile_coord_y());
3502 #ifdef RAWPATH // TODO GC_DEBUG
3503 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3504 udn_tile_coord_y());
3508 if(FLUSHPHASE == gcphase) {
3512 #ifdef RAWPATH // TODO GC_DEBUG
3513 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3514 udn_tile_coord_y());
3517 // send the num of obj/liveobj/forwardobj to the startupcore
3518 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3519 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3520 gc_num_liveobj, gc_num_forwardobj, false);
3523 #endif // GC_PROFLIE
3525 #ifdef RAWPATH // TODO GC_DEBUG
3526 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3527 udn_tile_coord_y());
3530 #ifdef GC_CACHE_ADAPT
3532 if(PREFINISHPHASE == gcphase) {
3536 #ifdef RAWPATH // TODO GC_DEBUG
3537 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3538 udn_tile_coord_y());
3540 // cache adapt phase
3541 cacheAdapt_mutator();
3542 cacheAdapt_gc(false);
3543 //send init finish msg to core coordinator
3544 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3545 #ifdef RAWPATH // TODO GC_DEBUG
3546 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3547 udn_tile_coord_y());
3549 #endif // GC_CACHE_ADAPT
3552 if(FINISHPHASE == gcphase) {
3556 #ifdef RAWPATH // TODO GC_DEBUG
3557 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3559 } // void gc_collect(struct garbagelist * stackptr)
3561 inline void gc_nocollect(struct garbagelist * stackptr) {
3562 // inform the master that this core is at a gc safe point and is ready to
3564 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3565 self_numreceiveobjs, false);
3568 if(INITPHASE == gcphase) {
3572 #ifdef RAWPATH // TODO GC_DEBUG
3573 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3576 #ifdef GC_CACHE_ADAPT
3577 // prepare for cache adaption:
3578 cacheAdapt_gc(true);
3579 #endif // GC_CACHE_ADAPT
3580 //send init finish msg to core coordinator
3581 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3584 if(MARKPHASE == gcphase) {
3588 #ifdef RAWPATH // TODO GC_DEBUG
3589 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3590 udn_tile_coord_y());
3592 mark(true, stackptr);
3593 #ifdef RAWPATH // TODO GC_DEBUG
3594 printf("(%x,%x) Finish mark phase, wait for flush\n",
3595 udn_tile_coord_x(), udn_tile_coord_y());
3598 // non-gc core collector routine
3600 if(FLUSHPHASE == gcphase) {
3604 #ifdef RAWPATH // TODO GC_DEBUG
3605 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3606 udn_tile_coord_y());
3609 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3610 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3611 gc_num_liveobj, gc_num_forwardobj, false);
3614 #endif // GC_PROFLIE
3616 #ifdef RAWPATH // TODO GC_DEBUG
3617 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3618 udn_tile_coord_y());
3621 #ifdef GC_CACHE_ADAPT
3623 if(PREFINISHPHASE == gcphase) {
3627 #ifdef RAWPATH // TODO GC_DEBUG
3628 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3629 udn_tile_coord_y());
3631 // cache adapt phase
3632 cacheAdapt_mutator();
3633 cacheAdapt_gc(false);
3634 //send init finish msg to core coordinator
3635 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3636 #ifdef RAWPATH // TODO GC_DEBUG
3637 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3638 udn_tile_coord_y());
3640 #endif // GC_CACHE_ADAPT
3643 if(FINISHPHASE == gcphase) {
3647 #ifdef RAWPATH // TODO GC_DEBUG
3648 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3650 } // void gc_collect(struct garbagelist * stackptr)
3652 inline void gc_master(struct garbagelist * stackptr) {
3654 gcphase = INITPHASE;
3656 waitconfirm = false;
3660 // Note: all cores need to init gc including non-gc cores
3661 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3662 // send GC init messages to all cores
3663 send_msg_1(i, GCSTARTINIT, false);
3665 bool isfirst = true;
3666 bool allStall = false;
3668 #ifdef GC_CACHE_ADAPT
3669 // prepare for cache adaption:
3670 cacheAdapt_gc(true);
3671 #endif // GC_CACHE_ADAPT
3673 #ifdef RAWPATH // TODO GC_DEBUG
3674 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3675 udn_tile_coord_y());
3678 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3680 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3681 if(gc_checkAllCoreStatus_I()) {
3682 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3685 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3690 #ifdef GC_CACHE_ADAPT
3691 //gc_output_cache_sampling();
3692 #endif // GC_CACHE_ADAPT
3693 #ifdef RAWPATH // TODO GC_DEBUG
3694 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3695 udn_tile_coord_y());
3697 // all cores have finished compacting
3698 // restore the gcstatus of all cores
3699 // Note: all cores have to do mark including non-gc cores
3700 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3701 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3702 gccorestatus[i] = 1;
3703 // send GC start messages to all cores
3704 send_msg_1(i, GCSTART, false);
3707 gcphase = MARKPHASE;
3709 while(MARKPHASE == gcphase) {
3710 mark(isfirst, stackptr);
3717 } // while(MARKPHASE == gcphase)
3718 // send msgs to all cores requiring large objs info
3719 // Note: only need to ask gc cores, non-gc cores do not host any objs
3720 numconfirm = NUMCORES4GC - 1;
3721 for(i = 1; i < NUMCORES4GC; ++i) {
3722 send_msg_1(i, GCLOBJREQUEST, false);
3724 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3729 } // wait for responses
3730 // check the heaptop
3731 if(gcheaptop < gcmarkedptrbound) {
3732 gcheaptop = gcmarkedptrbound;
3737 #ifdef RAWPATH // TODO GC_DEBUG
3738 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3739 udn_tile_coord_y());
3742 // cache all large objs
3744 // no enough space to cache large objs
3745 BAMBOO_EXIT(0xb107);
3747 // predict number of blocks to fill for each core
3749 int numpbc = loadbalance(&tmpheaptop);
3751 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3752 #ifdef RAWPATH // TODO GC_DEBUG
3753 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3754 udn_tile_coord_y());
3757 //int tmptopptr = 0;
3758 //BASEPTR(gctopcore, 0, &tmptopptr);
3760 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3761 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3763 BAMBOO_DEBUGPRINT(0xabab);
3764 BAMBOO_DEBUGPRINT_REG(tmptopptr);
3766 for(i = 0; i < NUMCORES4GC; ++i) {
3768 BASEPTR(i, numpbc, &tmpcoreptr);
3769 //send start compact messages to all cores
3770 //TODO bug here, do not know if the direction is positive or negtive?
3771 if (tmpcoreptr < tmpheaptop /*tmptopptr*/) {
3772 gcstopblock[i] = numpbc + 1;
3773 if(i != STARTUPCORE) {
3774 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3776 gcblock2fill = numpbc+1;
3777 } // if(i != STARTUPCORE)
3779 gcstopblock[i] = numpbc;
3780 if(i != STARTUPCORE) {
3781 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3783 gcblock2fill = numpbc;
3784 } // if(i != STARTUPCORE)
3787 BAMBOO_DEBUGPRINT(0xf000+i);
3788 BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3789 BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3791 // init some data strutures for compact phase
3793 gcfilledblocks[i] = 0;
3794 gcrequiredmems[i] = 0;
3804 bool finalcompact = false;
3805 // initialize pointers for comapcting
3806 struct moveHelper * orig =
3807 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3808 struct moveHelper * to =
3809 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3810 initOrig_Dst(orig, to);
3811 int filledblocks = 0;
3812 INTPTR heaptopptr = 0;
3813 bool finishcompact = false;
3814 bool iscontinue = true;
3815 bool localcompact = true;
3816 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3817 if((!finishcompact) && iscontinue) {
3819 BAMBOO_DEBUGPRINT(0xe001);
3820 BAMBOO_DEBUGPRINT_REG(numpbc);
3821 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3823 finishcompact = compacthelper(orig, to, &filledblocks,
3824 &heaptopptr, &localcompact);
3826 BAMBOO_DEBUGPRINT(0xe002);
3827 BAMBOO_DEBUGPRINT_REG(finishcompact);
3828 BAMBOO_DEBUGPRINT_REG(gctomove);
3829 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3830 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3831 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3835 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3836 if(gc_checkCoreStatus_I()) {
3837 // all cores have finished compacting
3838 // restore the gcstatus of all cores
3839 for(i = 0; i < NUMCORES4GC; ++i) {
3840 gccorestatus[i] = 1;
3842 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3845 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3846 // check if there are spare mem for pending move requires
3847 if(COMPACTPHASE == gcphase) {
3849 BAMBOO_DEBUGPRINT(0xe003);
3851 resolvePendingMoveRequest();
3853 BAMBOO_DEBUGPRINT_REG(gctomove);
3857 BAMBOO_DEBUGPRINT(0xe004);
3861 } // if(gc_checkCoreStatus_I()) else ...
3865 BAMBOO_DEBUGPRINT(0xe005);
3866 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3867 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3868 BAMBOO_DEBUGPRINT_REG(gctomove);
3870 to->ptr = gcmovestartaddr;
3871 to->numblocks = gcblock2fill - 1;
3872 to->bound = (to->numblocks==0) ?
3873 BAMBOO_SMEM_SIZE_L :
3874 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3875 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3876 to->offset = to->ptr - to->base;
3877 to->top = (to->numblocks==0) ?
3878 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3880 to->offset = BAMBOO_CACHE_LINE_SIZE;
3881 to->ptr += to->offset; // for header
3882 to->top += to->offset;
3883 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3884 localcompact = true;
3886 localcompact = false;
3890 } else if(!finishcompact) {
3894 } // while(COMPACTPHASE == gcphase)
3898 #ifdef RAWPATH // TODO GC_DEBUG
3899 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3900 udn_tile_coord_y());
3905 #ifdef RAWPATH // TODO GC_DEBUG
3906 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3907 udn_tile_coord_y());
3915 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3916 // Note: all cores should flush their runtime data including non-gc
3918 for(i = 1; i < NUMCORES4GC; ++i) {
3919 // send start flush messages to all cores
3920 gccorestatus[i] = 1;
3921 send_msg_1(i, GCSTARTMAPINFO, false);
3926 #ifdef RAWPATH // TODO GC_DEBUG
3927 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3928 udn_tile_coord_y());
3932 #ifdef RAWPATH // TODO GC_DEBUG
3933 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3934 udn_tile_coord_y());
3936 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3937 while(MAPPHASE == gcphase) {
3938 // check the status of all cores
3939 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3940 if(gc_checkCoreStatus_I()) {
3941 // all cores have finished sending mapping info
3942 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3945 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3946 } // while(MAPPHASE == gcphase)
3948 gcphase = FLUSHPHASE;
3949 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3950 // Note: all cores should flush their runtime data including non-gc
3952 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3953 // send start flush messages to all cores
3954 gccorestatus[i] = 1;
3955 send_msg_1(i, GCSTARTFLUSH, false);
3960 #ifdef RAWPATH // TODO GC_DEBUG
3961 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3962 udn_tile_coord_y());
3966 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3967 while(FLUSHPHASE == gcphase) {
3968 // check the status of all cores
3969 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3970 if(gc_checkAllCoreStatus_I()) {
3971 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3974 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3975 } // while(FLUSHPHASE == gcphase)
3976 #ifdef RAWPATH // TODO GC_DEBUG
3977 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3978 udn_tile_coord_y());
3981 #ifdef GC_CACHE_ADAPT
3982 // now the master core need to decide the new cache strategy
3983 cacheAdapt_master();
3985 gcphase = PREFINISHPHASE;
3986 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3987 // Note: all cores should flush their runtime data including non-gc
3989 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3990 // send start flush messages to all cores
3991 gccorestatus[i] = 1;
3992 send_msg_1(i, GCSTARTPREF, false);
3997 #ifdef RAWPATH // TODO GC_DEBUG
3998 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3999 udn_tile_coord_y());
4001 // cache adapt phase
4002 cacheAdapt_mutator();
4003 #ifdef GC_CACHE_ADAPT_OUTPUT
4004 bamboo_output_cache_policy();
4006 cacheAdapt_gc(false);
4007 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4008 while(PREFINISHPHASE == gcphase) {
4009 // check the status of all cores
4010 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4011 if(gc_checkAllCoreStatus_I()) {
4012 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4015 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4016 } // while(PREFINISHPHASE == gcphase)
4017 #endif // GC_CACHE_ADAPT
4019 gcphase = FINISHPHASE;
4021 // invalidate all shared mem pointers
4022 // put it here as it takes time to inform all the other cores to
4023 // finish gc and it might cause problem when some core resumes
4024 // mutator earlier than the other cores
4025 bamboo_cur_msp = NULL;
4026 bamboo_smem_size = 0;
4027 bamboo_smem_zero_top = NULL;
4029 gcprocessing = false;
4034 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4035 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
4036 // send gc finish messages to all cores
4037 send_msg_1(i, GCFINISH, false);
4038 gccorestatus[i] = 1;
4040 #ifdef RAWPATH // TODO GC_DEBUG
4041 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
4042 udn_tile_coord_y());
4045 } // void gc_master(struct garbagelist * stackptr)
4047 inline bool gc(struct garbagelist * stackptr) {
4050 gcprocessing = false;
4054 // core coordinator routine
4055 if(0 == BAMBOO_NUM_OF_CORE) {
4057 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
4058 udn_tile_coord_y());
4060 bool isallstall = true;
4061 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4062 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4064 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
4065 if(gccorestatus[ti] != 0) {
4071 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4072 // some of the cores are still executing the mutator and did not reach
4073 // some gc safe point, therefore it is not ready to do gc
4074 // in case that there are some pregc information msg lost, send a confirm
4075 // msg to the 'busy' core
4076 send_msg_1(ti, GCSTARTPRE, false);
4084 //BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4085 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
4086 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
4089 BAMBOO_DEBUGPRINT(0xec04);
4091 for(int i = 0; i < NUMCORESACTIVE; ++i) {
4092 sumsendobj += gcnumsendobjs[0][i];
4094 BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
4096 } // for(i = 1; i < NUMCORESACTIVE; ++i)
4098 BAMBOO_DEBUGPRINT(0xec05);
4099 BAMBOO_DEBUGPRINT_REG(sumsendobj);
4101 for(int i = 0; i < NUMCORESACTIVE; ++i) {
4102 sumsendobj -= gcnumreceiveobjs[0][i];
4104 BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
4106 } // for(i = 1; i < NUMCORESACTIVE; ++i)
4108 BAMBOO_DEBUGPRINT(0xec06);
4109 BAMBOO_DEBUGPRINT_REG(sumsendobj);
4111 if(0 != sumsendobj) {
4112 // there were still some msgs on the fly, wait until there
4113 // are some update pregc information coming and check it again
4115 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4123 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4126 #ifdef RAWPATH // TODO GC_DEBUG
4127 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
4130 // Zero out the remaining bamboo_cur_msp
4131 // Only zero out the first 4 bytes of the remaining memory
4132 // Move the operation here because for the GC_CACHE_ADAPT version,
4133 // we need to make sure during the gcinit phase the shared heap is not
4134 // touched. Otherwise, there would be problem when adapt the cache
4136 if((bamboo_cur_msp != 0)
4137 && (bamboo_smem_zero_top == bamboo_cur_msp)
4138 && (bamboo_smem_size > 0)) {
4139 *((int *)bamboo_cur_msp) = 0;
4141 #ifdef GC_FLUSH_DTLB
4142 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4143 BAMBOO_CLEAN_DTLB();
4144 gc_num_flush_dtlb++;
4147 #ifdef GC_CACHE_ADAPT
4148 #ifdef GC_CACHE_SAMPLING
4149 // disable the timer interrupt
4150 bamboo_mask_timer_intr();
4151 // get the sampling data
4152 bamboo_output_dtlb_sampling();
4153 #endif // GC_CACHE_SAMPLING
4154 #endif // GC_CACHE_ADAPT
4155 gcprocessing = true;
4156 gc_master(stackptr);
4157 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
4158 // Zero out the remaining bamboo_cur_msp
4159 // Only zero out the first 4 bytes of the remaining memory
4160 // Move the operation here because for the GC_CACHE_ADAPT version,
4161 // we need to make sure during the gcinit phase the shared heap is not
4162 // touched. Otherwise, there would be problem when adapt the cache
4164 if((bamboo_cur_msp != 0)
4165 && (bamboo_smem_zero_top == bamboo_cur_msp)
4166 && (bamboo_smem_size > 0)) {
4167 *((int *)bamboo_cur_msp) = 0;
4169 #ifdef GC_FLUSH_DTLB
4170 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4171 BAMBOO_CLEAN_DTLB();
4172 gc_num_flush_dtlb++;
4175 #ifdef GC_CACHE_ADAPT
4176 #ifdef GC_CACHE_SAMPLING
4177 // disable the timer interrupt
4178 bamboo_mask_timer_intr();
4179 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4180 // get the sampling data
4181 bamboo_output_dtlb_sampling();
4183 #endif // GC_CACHE_SAMPLING
4184 #endif // GC_CACHE_ADAPT
4185 gcprocessing = true;
4186 gc_collect(stackptr);
4188 // invalidate all shared mem pointers
4189 bamboo_cur_msp = NULL;
4190 bamboo_smem_size = 0;
4191 bamboo_smem_zero_top = NULL;
4193 gcprocessing = false;
4195 // Zero out the remaining bamboo_cur_msp
4196 // Only zero out the first 4 bytes of the remaining memory
4197 // Move the operation here because for the GC_CACHE_ADAPT version,
4198 // we need to make sure during the gcinit phase the shared heap is not
4199 // touched. Otherwise, there would be problem when adapt the cache
4201 if((bamboo_cur_msp != 0)
4202 && (bamboo_smem_zero_top == bamboo_cur_msp)
4203 && (bamboo_smem_size > 0)) {
4204 *((int *)bamboo_cur_msp) = 0;
4206 #ifdef GC_FLUSH_DTLB
4207 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4208 BAMBOO_CLEAN_DTLB();
4209 gc_num_flush_dtlb++;
4212 #ifdef GC_CACHE_ADAPT
4213 #ifdef GC_CACHE_SAMPLING
4214 // disable the timer interrupt
4215 bamboo_mask_timer_intr();
4216 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4217 // get the sampling data
4218 bamboo_output_dtlb_sampling();
4220 #endif // GC_CACHE_SAMPLING
4221 #endif // GC_CACHE_ADAPT
4222 // not a gc core, should wait for gcfinish msg
4223 gcprocessing = true;
4224 gc_nocollect(stackptr);
4226 // invalidate all shared mem pointers
4227 bamboo_cur_msp = NULL;
4228 bamboo_smem_size = 0;
4229 bamboo_smem_zero_top = NULL;
4231 gcprocessing = false;
4233 #ifdef GC_CACHE_ADAPT
4234 #ifdef GC_CACHE_SAMPLING
4235 // reset the sampling arrays
4236 bamboo_dtlb_sampling_reset();
4237 #endif // GC_CACHE_SAMPLING
4238 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4239 // zero out the gccachesamplingtbl
4240 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4241 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4242 size_cachesamplingtbl_local_r);
4243 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
4244 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4247 #ifdef GC_CACHE_SAMPLING
4248 // enable the timer interrupt
4249 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4250 bamboo_unmask_timer_intr();
4251 #endif // GC_CACHE_SAMPLING
4252 #endif // GC_CACHE_ADAPT
4254 } // void gc(struct garbagelist * stackptr)
4257 inline void gc_profileStart(void) {
4258 if(!gc_infoOverflow) {
4259 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4260 gc_infoArray[gc_infoIndex] = gcInfo;
4262 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4266 inline void gc_profileItem(void) {
4267 if(!gc_infoOverflow) {
4268 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4269 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4273 inline void gc_profileEnd(void) {
4274 if(!gc_infoOverflow) {
4275 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4276 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4277 gcInfo->time[gcInfo->index++] = gc_num_livespace;
4278 gcInfo->time[gcInfo->index++] = gc_num_freespace;
4279 gcInfo->time[gcInfo->index++] = gc_num_lobj;
4280 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4281 gcInfo->time[gcInfo->index++] = gc_num_obj;
4282 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4283 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4285 if(gc_infoIndex == GCINFOLENGTH) {
4286 gc_infoOverflow = true;
4287 //taskInfoIndex = 0;
4292 // output the profiling data
4293 void gc_outputProfileData() {
4296 unsigned long long totalgc = 0;
4298 //printf("Start Time, End Time, Duration\n");
4299 // output task related info
4300 for(i = 0; i < gc_infoIndex; i++) {
4301 GCInfo * gcInfo = gc_infoArray[i];
4302 unsigned long long tmp = 0;
4303 for(j = 0; j < gcInfo->index; j++) {
4304 printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
4305 tmp = gcInfo->time[j];
4307 tmp = (tmp-gcInfo->time[0]);
4308 printf(" ++ %lld \n", tmp);
4312 if(gc_infoOverflow) {
4313 printf("Caution: gc info overflow!\n");
4316 printf("\n\n total gc time: %lld \n", totalgc);
4320 unsigned long long totalgc = 0;
4322 #ifndef BAMBOO_MEMPROF
4323 BAMBOO_DEBUGPRINT(0xdddd);
4325 // output task related info
4326 for(i= 0; i < gc_infoIndex; i++) {
4327 GCInfo * gcInfo = gc_infoArray[i];
4328 #ifdef BAMBOO_MEMPROF
4329 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4331 unsigned long long tmp = 0;
4332 BAMBOO_DEBUGPRINT(0xddda);
4333 for(j = 0; j < gcInfo->index - 7; j++) {
4334 BAMBOO_DEBUGPRINT(gcInfo->time[j]);
4335 BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
4336 BAMBOO_DEBUGPRINT(0xdddb);
4337 tmp = gcInfo->time[j];
4339 tmp = (tmp-gcInfo->time[0]);
4340 BAMBOO_DEBUGPRINT_REG(tmp);
4341 BAMBOO_DEBUGPRINT(0xdddc);
4342 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]);
4343 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]);
4344 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]);
4345 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]);
4346 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]);
4347 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]);
4348 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]);
4349 BAMBOO_DEBUGPRINT(0xddde);
4353 #ifndef BAMBOO_MEMPROF
4354 BAMBOO_DEBUGPRINT(0xdddf);
4356 BAMBOO_DEBUGPRINT_REG(totalgc);
4358 if(gc_infoOverflow) {
4359 BAMBOO_DEBUGPRINT(0xefee);
4362 #ifndef BAMBOO_MEMPROF
4363 BAMBOO_DEBUGPRINT(0xeeee);
4367 #endif // #ifdef GC_PROFILE