3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 int lengths[NUMLOBJPTRS];
45 int hosts[NUMLOBJPTRS];
46 struct lobjpointerblock *next;
47 struct lobjpointerblock *prev;
50 struct lobjpointerblock *gclobjhead=NULL;
51 int gclobjheadindex=0;
52 struct lobjpointerblock *gclobjtail=NULL;
53 int gclobjtailindex=0;
54 struct lobjpointerblock *gclobjtail2=NULL;
55 int gclobjtailindex2=0;
56 struct lobjpointerblock *gclobjspare=NULL;
59 typedef struct gc_cache_revise_info {
60 int orig_page_start_va;
66 unsigned int revised_sampling[NUMCORESACTIVE];
67 } gc_cache_revise_info_t;
68 gc_cache_revise_info_t gc_cache_revise_infomation;
69 #endif// GC_CACHE_ADAPT
72 // dump whole mem in blocks
73 inline void dumpSMem() {
81 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
83 // reserved blocks for sblocktbl
84 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
86 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
87 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
88 udn_tile_coord_x(), udn_tile_coord_y(),
89 *((int *)(i)), *((int *)(i + 4)),
90 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
91 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
92 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
93 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
94 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
95 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
96 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
98 sblock = gcreservedsb;
99 bool advanceblock = false;
101 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
102 advanceblock = false;
103 // computing sblock # and block #, core coordinate (x,y) also
104 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
106 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
107 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
119 coren = gc_block2core[block%(NUMCORES4GC*2)];
121 // compute core coordinate
122 BAMBOO_COORDS(coren, &x, &y);
123 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
124 udn_tile_coord_x(), udn_tile_coord_y(),
125 block, sblock++, x, y,
126 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
129 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
130 udn_tile_coord_x(), udn_tile_coord_y(),
131 *((int *)(i)), *((int *)(i + 4)),
132 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
133 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
134 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
135 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
136 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
137 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
138 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
140 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
144 // should be invoked with interruption closed
145 inline void gc_enqueue_I(void *ptr) {
146 GC_BAMBOO_DEBUGPRINT(0xe601);
147 GC_BAMBOO_DEBUGPRINT_REG(ptr);
148 if (gcheadindex==NUMPTRS) {
149 struct pointerblock * tmp;
154 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
155 } // if (gcspare!=NULL)
159 } // if (gcheadindex==NUMPTRS)
160 gchead->ptrs[gcheadindex++]=ptr;
161 GC_BAMBOO_DEBUGPRINT(0xe602);
162 } // void gc_enqueue_I(void *ptr)
164 // dequeue and destroy the queue
165 inline void * gc_dequeue_I() {
166 if (gctailindex==NUMPTRS) {
167 struct pointerblock *tmp=gctail;
174 } // if (gcspare!=NULL)
175 } // if (gctailindex==NUMPTRS)
176 return gctail->ptrs[gctailindex++];
177 } // void * gc_dequeue()
179 // dequeue and do not destroy the queue
180 inline void * gc_dequeue2_I() {
181 if (gctailindex2==NUMPTRS) {
182 struct pointerblock *tmp=gctail2;
183 gctail2=gctail2->next;
185 } // if (gctailindex2==NUMPTRS)
186 return gctail2->ptrs[gctailindex2++];
187 } // void * gc_dequeue2()
189 inline int gc_moreItems_I() {
190 if ((gchead==gctail)&&(gctailindex==gcheadindex))
193 } // int gc_moreItems()
195 inline int gc_moreItems2_I() {
196 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
199 } // int gc_moreItems2()
201 // should be invoked with interruption closed
202 // enqueue a large obj: start addr & length
203 inline void gc_lobjenqueue_I(void *ptr,
206 GC_BAMBOO_DEBUGPRINT(0xe901);
207 if (gclobjheadindex==NUMLOBJPTRS) {
208 struct lobjpointerblock * tmp;
209 if (gclobjspare!=NULL) {
213 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
214 } // if (gclobjspare!=NULL)
215 gclobjhead->next=tmp;
216 tmp->prev = gclobjhead;
219 } // if (gclobjheadindex==NUMLOBJPTRS)
220 gclobjhead->lobjs[gclobjheadindex]=ptr;
221 gclobjhead->lengths[gclobjheadindex]=length;
222 gclobjhead->hosts[gclobjheadindex++]=host;
223 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
224 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
225 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
226 } // void gc_lobjenqueue_I(void *ptr...)
228 // dequeue and destroy the queue
229 inline void * gc_lobjdequeue_I(int * length,
231 if (gclobjtailindex==NUMLOBJPTRS) {
232 struct lobjpointerblock *tmp=gclobjtail;
233 gclobjtail=gclobjtail->next;
235 gclobjtail->prev = NULL;
236 if (gclobjspare!=NULL) {
242 } // if (gclobjspare!=NULL)
243 } // if (gclobjtailindex==NUMLOBJPTRS)
245 *length = gclobjtail->lengths[gclobjtailindex];
248 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
250 return gclobjtail->lobjs[gclobjtailindex++];
251 } // void * gc_lobjdequeue()
253 inline int gc_lobjmoreItems_I() {
254 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
257 } // int gc_lobjmoreItems()
259 // dequeue and don't destroy the queue
260 inline void gc_lobjdequeue2_I() {
261 if (gclobjtailindex2==NUMLOBJPTRS) {
262 gclobjtail2=gclobjtail2->next;
266 } // if (gclobjtailindex2==NUMLOBJPTRS)
267 } // void * gc_lobjdequeue2()
269 inline int gc_lobjmoreItems2_I() {
270 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
273 } // int gc_lobjmoreItems2()
275 // 'reversly' dequeue and don't destroy the queue
276 inline void gc_lobjdequeue3_I() {
277 if (gclobjtailindex2==0) {
278 gclobjtail2=gclobjtail2->prev;
279 gclobjtailindex2=NUMLOBJPTRS-1;
282 } // if (gclobjtailindex2==NUMLOBJPTRS)
283 } // void * gc_lobjdequeue3()
285 inline int gc_lobjmoreItems3_I() {
286 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
289 } // int gc_lobjmoreItems3()
291 inline void gc_lobjqueueinit4_I() {
292 gclobjtail2 = gclobjtail;
293 gclobjtailindex2 = gclobjtailindex;
294 } // void gc_lobjqueueinit2()
296 inline void * gc_lobjdequeue4_I(int * length,
298 if (gclobjtailindex2==NUMLOBJPTRS) {
299 gclobjtail2=gclobjtail2->next;
301 } // if (gclobjtailindex==NUMLOBJPTRS)
303 *length = gclobjtail2->lengths[gclobjtailindex2];
306 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
308 return gclobjtail2->lobjs[gclobjtailindex2++];
309 } // void * gc_lobjdequeue()
311 inline int gc_lobjmoreItems4_I() {
312 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
315 } // int gc_lobjmoreItems(
317 INTPTR gccurr_heapbound = 0;
319 inline void gettype_size(void * ptr,
322 int type = ((int *)ptr)[0];
324 if(type < NUMCLASSES) {
326 size = classsize[type];
329 struct ArrayObject *ao=(struct ArrayObject *)ptr;
330 int elementsize=classsize[type];
331 int length=ao->___length___;
332 size=sizeof(struct ArrayObject)+length*elementsize;
333 } // if(type < NUMCLASSES)
338 inline bool isLarge(void * ptr,
341 GC_BAMBOO_DEBUGPRINT(0xe701);
342 GC_BAMBOO_DEBUGPRINT_REG(ptr);
343 // check if a pointer is referring to a large object
344 gettype_size(ptr, ttype, tsize);
345 GC_BAMBOO_DEBUGPRINT(*tsize);
346 int bound = (BAMBOO_SMEM_SIZE);
347 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
348 bound = (BAMBOO_SMEM_SIZE_L);
350 if((((int)ptr-gcbaseva)%(bound))==0) {
351 // ptr is a start of a block
352 GC_BAMBOO_DEBUGPRINT(0xe702);
353 GC_BAMBOO_DEBUGPRINT(1);
356 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
357 // it acrosses the boundary of current block
358 GC_BAMBOO_DEBUGPRINT(0xe703);
359 GC_BAMBOO_DEBUGPRINT(1);
362 GC_BAMBOO_DEBUGPRINT(0);
364 } // bool isLarge(void * ptr, int * ttype, int * tsize)
366 inline int hostcore(void * ptr) {
367 // check the host core of ptr
369 RESIDECORE(ptr, &host);
370 GC_BAMBOO_DEBUGPRINT(0xedd0);
371 GC_BAMBOO_DEBUGPRINT_REG(ptr);
372 GC_BAMBOO_DEBUGPRINT_REG(host);
374 } // int hostcore(void * ptr)
376 inline void cpu2coords(int coren,
379 *x = bamboo_cpu2coords[2*coren];
380 *y = bamboo_cpu2coords[2*coren+1];
381 } // void cpu2coords(...)
383 inline bool isLocal(void * ptr) {
384 // check if a pointer is in shared heap on this core
385 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
386 } // bool isLocal(void * ptr)
388 inline bool gc_checkCoreStatus_I() {
389 bool allStall = true;
390 for(int i = 0; i < NUMCORES4GC; ++i) {
391 if(gccorestatus[i] != 0) {
394 } // if(gccorestatus[i] != 0)
395 } // for(i = 0; i < NUMCORES4GC; ++i)
399 inline bool gc_checkAllCoreStatus_I() {
400 bool allStall = true;
401 for(int i = 0; i < NUMCORESACTIVE; ++i) {
402 if(gccorestatus[i] != 0) {
405 } // if(gccorestatus[i] != 0)
406 } // for(i = 0; i < NUMCORESACTIVE; ++i)
410 inline void checkMarkStatue() {
411 GC_BAMBOO_DEBUGPRINT(0xee01);
414 (waitconfirm && (numconfirm == 0))) {
415 GC_BAMBOO_DEBUGPRINT(0xee02);
419 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
422 entry_index = gcnumsrobjs_index;
424 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
425 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
426 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
427 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
428 // check the status of all cores
429 bool allStall = gc_checkAllCoreStatus_I();
430 GC_BAMBOO_DEBUGPRINT(0xee03);
432 GC_BAMBOO_DEBUGPRINT(0xee04);
435 GC_BAMBOO_DEBUGPRINT(0xee05);
436 // the first time found all cores stall
437 // send out status confirm msg to all other cores
438 // reset the corestatus array too
439 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
441 numconfirm = NUMCORESACTIVE - 1;
442 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
443 for(i = 1; i < NUMCORESACTIVE; ++i) {
445 // send mark phase finish confirm request msg to core i
446 send_msg_1(i, GCMARKCONFIRM, false);
447 } // for(i = 1; i < NUMCORESACTIVE; ++i)
450 // check if the sum of send objs and receive obj are the same
451 // yes->check if the info is the latest; no->go on executing
453 for(i = 0; i < NUMCORESACTIVE; ++i) {
454 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
455 } // for(i = 0; i < NUMCORESACTIVE; ++i)
456 GC_BAMBOO_DEBUGPRINT(0xee06);
457 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
458 for(i = 0; i < NUMCORESACTIVE; ++i) {
459 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
460 } // for(i = 0; i < NUMCORESACTIVE; ++i)
461 GC_BAMBOO_DEBUGPRINT(0xee07);
462 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
463 if(0 == sumsendobj) {
464 // Check if there are changes of the numsendobjs or numreceiveobjs on
466 bool ischanged = false;
467 for(i = 0; i < NUMCORESACTIVE; ++i) {
468 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
469 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
473 } // for(i = 0; i < NUMCORESACTIVE; ++i)
474 GC_BAMBOO_DEBUGPRINT(0xee08);
475 GC_BAMBOO_DEBUGPRINT_REG(ischanged);
477 GC_BAMBOO_DEBUGPRINT(0xee09);
478 // all the core status info are the latest
480 gcphase = COMPACTPHASE;
481 // restore the gcstatus for all cores
482 for(i = 0; i < NUMCORESACTIVE; ++i) {
484 } // for(i = 0; i < NUMCORESACTIVE; ++i)
487 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
490 // There were changes between phase 1 and phase 2, can not decide
491 // whether the mark phase has been finished
493 // As it fails in phase 2, flip the entries
494 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
495 } // if(0 == sumsendobj) else ...
496 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
497 } // if(!gcwaitconfirm) else()
499 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
501 } // if((!waitconfirm)...
502 GC_BAMBOO_DEBUGPRINT(0xee0a);
503 } // void checkMarkStatue()
505 inline void initGC() {
507 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
508 for(i = 0; i < NUMCORES4GC; ++i) {
510 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
511 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
513 gcrequiredmems[i] = 0;
514 gcfilledblocks[i] = 0;
516 } // for(i = 0; i < NUMCORES4GC; ++i)
517 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
519 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
520 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
525 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
526 gcself_numsendobjs = 0;
527 gcself_numreceiveobjs = 0;
528 gcmarkedptrbound = 0;
541 gcheadindex=gctailindex=gctailindex2 = 0;
542 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
544 gctailindex = gctailindex2 = gcheadindex;
545 gctail = gctail2 = gchead;
548 // initialize the large obj queues
549 if (gclobjhead==NULL) {
552 gclobjtailindex2 = 0;
553 gclobjhead=gclobjtail=gclobjtail2=
554 RUNMALLOC(sizeof(struct lobjpointerblock));
556 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
557 gclobjtail = gclobjtail2 = gclobjhead;
559 gclobjhead->next = gclobjhead->prev = NULL;
561 #ifdef LOCALHASHTBL_TEST
562 freeRuntimeHash(gcpointertbl);
563 gcpointertbl = allocateRuntimeHash(20);
565 mgchashreset(gcpointertbl);
568 freeMGCHash(gcforwardobjtbl);
569 gcforwardobjtbl = allocateMGCHash(20, 3);
571 // initialize the mapping info related structures
572 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
573 // Never free the shared hash table, just reset it
574 mgcsharedhashReset(gcsharedptbl);
577 gc_num_livespace = 0;
578 gc_num_freespace = 0;
580 gc_num_lobjspace = 0;
582 gc_num_forwardobj = 0;
583 gc_num_profiles = NUMCORESACTIVE - 1;
587 // compute load balance for all cores
588 inline int loadbalance(int * heaptop) {
589 // compute load balance
592 // get the total loads
593 int tloads = gcloads[STARTUPCORE];
594 for(i = 1; i < NUMCORES4GC; i++) {
595 tloads += gcloads[i];
597 *heaptop = gcbaseva + tloads;
599 GC_BAMBOO_DEBUGPRINT(0xdddd);
600 GC_BAMBOO_DEBUGPRINT_REG(tloads);
601 GC_BAMBOO_DEBUGPRINT_REG(*heaptop);
603 BLOCKINDEX(*heaptop, &b);
604 int numbpc = b / NUMCORES4GC; // num of blocks per core
605 GC_BAMBOO_DEBUGPRINT_REG(b);
606 GC_BAMBOO_DEBUGPRINT_REG(numbpc);
608 RESIDECORE(heaptop, &gctopcore);
609 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
611 } // void loadbalance(int * heaptop)
613 inline bool cacheLObjs() {
614 // check the total mem size need for large objs
615 unsigned long long sumsize = 0;
617 GC_BAMBOO_DEBUGPRINT(0xe801);
618 gclobjtail2 = gclobjtail;
619 gclobjtailindex2 = gclobjtailindex;
623 // compute total mem size required and sort the lobjs in ascending order
624 // TODO USE QUICK SORT INSTEAD?
625 while(gc_lobjmoreItems2_I()) {
627 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
628 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
629 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
634 GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
635 GC_BAMBOO_DEBUGPRINT_REG(tmp_len);
636 GC_BAMBOO_DEBUGPRINT_REG(sumsize);
637 int i = gclobjtailindex2-1;
638 struct lobjpointerblock * tmp_block = gclobjtail2;
639 // find the place to insert
642 if(tmp_block->prev == NULL) {
645 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
646 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
647 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
648 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
649 tmp_block = tmp_block->prev;
653 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
655 if(tmp_block->lobjs[i-1] > tmp_lobj) {
656 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
657 tmp_block->lengths[i] = tmp_block->lengths[i-1];
658 tmp_block->hosts[i] = tmp_block->hosts[i-1];
662 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
663 } // if(i ==0 ) else {}
666 if(i != gclobjtailindex2 - 1) {
667 tmp_block->lobjs[i] = tmp_lobj;
668 tmp_block->lengths[i] = tmp_len;
669 tmp_block->hosts[i] = tmp_host;
671 } // while(gc_lobjmoreItems2())
674 gc_num_lobjspace = sumsize;
676 // check if there are enough space to cache these large objs
677 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
678 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
679 // do not have enough room to cache large objs
680 GC_BAMBOO_DEBUGPRINT(0xe802);
681 GC_BAMBOO_DEBUGPRINT_REG(dst);
682 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
683 GC_BAMBOO_DEBUGPRINT_REG(sumsize);
686 GC_BAMBOO_DEBUGPRINT(0xe803);
687 GC_BAMBOO_DEBUGPRINT_REG(dst);
688 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
690 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
691 // cache the largeObjs to the top of the shared heap
692 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
693 while(gc_lobjmoreItems3_I()) {
695 size = gclobjtail2->lengths[gclobjtailindex2];
696 // set the mark field to , indicating that this obj has been moved
697 // and need to be flushed
698 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
700 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
701 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
703 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
705 GC_BAMBOO_DEBUGPRINT(0x804);
706 GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
707 GC_BAMBOO_DEBUGPRINT(dst);
708 GC_BAMBOO_DEBUGPRINT_REG(size);
709 GC_BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
710 GC_BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
713 } // void cacheLObjs()
715 // update the bmmboo_smemtbl to record current shared mem usage
716 void updateSmemTbl(int coren,
719 int bound = BAMBOO_SMEM_SIZE_L;
720 BLOCKINDEX(localtop, <opcore);
721 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
722 bound = BAMBOO_SMEM_SIZE;
724 int load = (localtop-gcbaseva)%bound;
729 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
730 if(toset < ltopcore) {
731 bamboo_smemtbl[toset]=
732 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
734 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
736 } else if(toset == ltopcore) {
737 bamboo_smemtbl[toset] = load;
739 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
751 } // void updateSmemTbl(int, int)
753 inline void moveLObjs() {
754 GC_BAMBOO_DEBUGPRINT(0xea01);
756 // update the gcmem_mixed_usedmem
757 gcmem_mixed_usedmem = 0;
759 // zero out the smemtbl
760 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
761 // find current heap top
762 // flush all gcloads to indicate the real heap top on one core
763 // previous it represents the next available ptr on a core
764 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
765 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
766 // edge of a block, check if this is exactly the heaptop
767 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
768 gcloads[0]+=(gcfilledblocks[0]>1 ?
769 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
771 updateSmemTbl(0, gcloads[0]);
772 GC_BAMBOO_DEBUGPRINT(0xea02);
773 GC_BAMBOO_DEBUGPRINT_REG(gcloads[0]);
774 GC_BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
775 for(int i = 1; i < NUMCORES4GC; i++) {
777 GC_BAMBOO_DEBUGPRINT(0xf000+i);
778 GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
779 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
780 if((gcfilledblocks[i] > 0)
781 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
782 // edge of a block, check if this is exactly the heaptop
783 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
785 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
788 updateSmemTbl(i, gcloads[i]);
789 GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
790 } // for(int i = 1; i < NUMCORES4GC; i++) {
792 // find current heap top
794 // a bug here: when using local allocation, directly move large objects
795 // to the highest free chunk might not be memory efficient
800 for(i = gcnumblock-1; i >= 0; i--) {
801 if(bamboo_smemtbl[i] > 0) {
806 tmpheaptop = gcbaseva;
808 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
809 (BAMBOO_SMEM_SIZE_L*i) :
810 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
813 // move large objs from gcheaptop to tmpheaptop
814 // write the header first
815 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
817 gcmem_mixed_usedmem += tomove;
819 GC_BAMBOO_DEBUGPRINT(0xea03);
820 GC_BAMBOO_DEBUGPRINT_REG(tomove);
821 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
822 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
823 // flush the sbstartbl
824 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
825 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
827 gcheaptop = tmpheaptop;
829 // check how many blocks it acrosses
830 int remain = tmpheaptop-gcbaseva;
831 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
832 int b = 0; // number of the block
833 BLOCKINDEX(tmpheaptop, &b);
834 // check the remaining space in this block
835 bound = (BAMBOO_SMEM_SIZE);
836 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
837 bound = (BAMBOO_SMEM_SIZE_L);
839 remain = bound - remain%bound;
841 GC_BAMBOO_DEBUGPRINT(0xea04);
846 int base = tmpheaptop;
848 remain -= BAMBOO_CACHE_LINE_SIZE;
849 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
850 gc_lobjqueueinit4_I();
851 while(gc_lobjmoreItems4_I()) {
852 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
853 ALIGNSIZE(size, &isize);
855 // this object acrosses blocks
857 // close current block, fill its header
858 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
859 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
860 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
864 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
865 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
867 remain -= BAMBOO_CACHE_LINE_SIZE;
868 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
869 BLOCKINDEX(tmpheaptop, &b);
870 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
873 // move the large obj
874 if((int)gcheaptop < (int)(tmpheaptop)+size) {
875 memmove(tmpheaptop, gcheaptop, size);
877 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
878 memcpy(tmpheaptop, gcheaptop, size);
880 // fill the remaining space with -2 padding
881 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
882 GC_BAMBOO_DEBUGPRINT(0xea05);
883 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
884 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
885 GC_BAMBOO_DEBUGPRINT_REG(size);
886 GC_BAMBOO_DEBUGPRINT_REG(isize);
887 GC_BAMBOO_DEBUGPRINT_REG(base);
889 // cache the mapping info anyway
890 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
891 #ifdef LOCALHASHTBL_TEST
892 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
894 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
896 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
897 GC_BAMBOO_DEBUGPRINT(0xcdca);
898 GC_BAMBOO_DEBUGPRINT_REG(ptr);
899 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
900 if(host != BAMBOO_NUM_OF_CORE) {
901 // send the original host core with the mapping info
902 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
903 GC_BAMBOO_DEBUGPRINT(0xcdcb);
904 GC_BAMBOO_DEBUGPRINT_REG(ptr);
905 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
906 } // if(host != BAMBOO_NUM_OF_CORE)
909 // set the gcsbstarttbl and bamboo_smemtbl
910 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
911 for(int k = 1; k < tmpsbs; k++) {
912 gcsbstarttbl[sb+k] = (INTPTR)(-1);
915 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
916 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
917 for(; b < tmpsbs; b++) {
918 bamboo_smemtbl[b] = bound;
919 if(b==NUMCORES4GC-1) {
920 bound = BAMBOO_SMEM_SIZE;
923 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
924 gcsbstarttbl[sb] = (INTPTR)(-1);
925 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
926 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
927 bamboo_smemtbl[b] = bound;
929 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
930 remain = tmpheaptop-gcbaseva;
931 bamboo_smemtbl[b] = remain%bound;
932 remain = bound - bamboo_smemtbl[b];
933 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
935 // close current block and fill the header
936 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
937 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
940 if(remain == BAMBOO_CACHE_LINE_SIZE) {
941 // fill with 0 in case
942 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
944 remain -= BAMBOO_CACHE_LINE_SIZE;
945 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
948 // move the large obj
949 if((int)gcheaptop < (int)(tmpheaptop)+size) {
950 memmove(tmpheaptop, gcheaptop, size);
952 memcpy(tmpheaptop, gcheaptop, size);
954 // fill the remaining space with -2 padding
955 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
956 GC_BAMBOO_DEBUGPRINT(0xea06);
957 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
958 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
959 GC_BAMBOO_DEBUGPRINT_REG(size);
960 GC_BAMBOO_DEBUGPRINT_REG(isize);
964 // cache the mapping info anyway
965 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
966 #ifdef LOCALHASHTBL_TEST
967 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
969 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
971 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
972 GC_BAMBOO_DEBUGPRINT(0xcdcc);
973 GC_BAMBOO_DEBUGPRINT_REG(ptr);
974 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
975 GC_BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
976 if(host != BAMBOO_NUM_OF_CORE) {
977 // send the original host core with the mapping info
978 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
979 GC_BAMBOO_DEBUGPRINT(0xcdcd);
980 GC_BAMBOO_DEBUGPRINT_REG(ptr);
981 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
982 } // if(host != BAMBOO_NUM_OF_CORE)
985 // update bamboo_smemtbl
986 bamboo_smemtbl[b] += isize;
987 } // if(remain < isize) else ...
988 } // while(gc_lobjmoreItems())
990 // close current block, fill the header
991 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
992 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
993 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
995 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
997 gcheaptop = tmpheaptop;
1001 GC_BAMBOO_DEBUGPRINT(0xea07);
1002 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1004 bamboo_free_block = 0;
1007 tbound = (bamboo_free_block<NUMCORES4GC) ?
1008 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1009 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1010 bamboo_free_block++;
1012 // the first non-full partition
1018 // check how many live space there are
1019 gc_num_livespace = 0;
1020 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1021 gc_num_livespace += bamboo_smemtbl[tmpi];
1023 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1025 GC_BAMBOO_DEBUGPRINT(0xea08);
1026 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1027 } // void moveLObjs()
1029 inline void markObj(void * objptr) {
1030 if(objptr == NULL) {
1033 if(ISSHAREDOBJ(objptr)) {
1034 int host = hostcore(objptr);
1035 if(BAMBOO_NUM_OF_CORE == host) {
1037 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1038 if(((int *)objptr)[6] == INIT) {
1039 // this is the first time that this object is discovered,
1040 // set the flag as DISCOVERED
1041 ((int *)objptr)[6] |= DISCOVERED;
1042 BAMBOO_CACHE_FLUSH_LINE(objptr);
1043 gc_enqueue_I(objptr);
1045 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1047 GC_BAMBOO_DEBUGPRINT(0xbbbb);
1048 GC_BAMBOO_DEBUGPRINT_REG(host);
1049 GC_BAMBOO_DEBUGPRINT_REG(objptr);
1050 // check if this obj has been forwarded
1051 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1052 // send a msg to host informing that objptr is active
1053 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1055 gc_num_forwardobj++;
1056 #endif // GC_PROFILE
1057 gcself_numsendobjs++;
1058 MGCHashadd(gcforwardobjtbl, (int)objptr);
1062 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1063 gc_enqueue_I(objptr);
1064 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1065 } // if(ISSHAREDOBJ(objptr))
1066 } // void markObj(void * objptr)
1068 // enqueue root objs
1069 inline void tomark(struct garbagelist * stackptr) {
1070 if(MARKPHASE != gcphase) {
1071 GC_BAMBOO_DEBUGPRINT_REG(gcphase);
1072 BAMBOO_EXIT(0xb001);
1074 gcbusystatus = true;
1078 // enqueue current stack
1079 while(stackptr!=NULL) {
1080 GC_BAMBOO_DEBUGPRINT(0xe501);
1081 GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
1082 GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
1083 GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1084 for(i=0; i<stackptr->size; i++) {
1085 if(stackptr->array[i] != NULL) {
1086 markObj(stackptr->array[i]);
1089 stackptr=stackptr->next;
1092 GC_BAMBOO_DEBUGPRINT(0xe503);
1093 // enqueue objectsets
1094 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1095 for(i=0; i<NUMCLASSES; i++) {
1096 struct parameterwrapper ** queues =
1097 objectqueues[BAMBOO_NUM_OF_CORE][i];
1098 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1099 for(j = 0; j < length; ++j) {
1100 struct parameterwrapper * parameter = queues[j];
1101 struct ObjectHash * set=parameter->objectset;
1102 struct ObjectNode * ptr=set->listhead;
1104 markObj((void *)ptr->key);
1111 // euqueue current task descriptor
1112 if(currtpd != NULL) {
1113 GC_BAMBOO_DEBUGPRINT(0xe504);
1114 for(i=0; i<currtpd->numParameters; i++) {
1115 markObj(currtpd->parameterArray[i]);
1119 GC_BAMBOO_DEBUGPRINT(0xe505);
1120 // euqueue active tasks
1121 if(activetasks != NULL) {
1122 struct genpointerlist * ptr=activetasks->list;
1124 struct taskparamdescriptor *tpd=ptr->src;
1126 for(i=0; i<tpd->numParameters; i++) {
1127 markObj(tpd->parameterArray[i]);
1133 GC_BAMBOO_DEBUGPRINT(0xe506);
1134 // enqueue cached transferred obj
1135 struct QueueItem * tmpobjptr = getHead(&objqueue);
1136 while(tmpobjptr != NULL) {
1137 struct transObjInfo * objInfo =
1138 (struct transObjInfo *)(tmpobjptr->objectptr);
1139 markObj(objInfo->objptr);
1140 tmpobjptr = getNextQueueItem(tmpobjptr);
1143 GC_BAMBOO_DEBUGPRINT(0xe507);
1144 // enqueue cached objs to be transferred
1145 struct QueueItem * item = getHead(totransobjqueue);
1146 while(item != NULL) {
1147 struct transObjInfo * totransobj =
1148 (struct transObjInfo *)(item->objectptr);
1149 markObj(totransobj->objptr);
1150 item = getNextQueueItem(item);
1151 } // while(item != NULL)
1153 GC_BAMBOO_DEBUGPRINT(0xe508);
1154 // enqueue lock related info
1155 for(i = 0; i < runtime_locklen; ++i) {
1156 markObj((void *)(runtime_locks[i].redirectlock));
1157 if(runtime_locks[i].value != NULL) {
1158 markObj((void *)(runtime_locks[i].value));
1162 } // void tomark(struct garbagelist * stackptr)
1164 inline void mark(bool isfirst,
1165 struct garbagelist * stackptr) {
1166 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed01);
1168 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed02);
1169 // enqueue root objs
1171 gccurr_heaptop = 0; // record the size of all active objs in this core
1172 // aligned but does not consider block boundaries
1173 gcmarkedptrbound = 0;
1175 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed03);
1177 bool checkfield = true;
1178 bool sendStall = false;
1180 while(MARKPHASE == gcphase) {
1181 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed04);
1183 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1184 bool hasItems = gc_moreItems2_I();
1185 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1186 GC_BAMBOO_DEBUGPRINT(0xed05);
1191 gcbusystatus = true;
1193 void * ptr = gc_dequeue2_I();
1195 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1199 // check if it is a shared obj
1200 if(ISSHAREDOBJ(ptr)) {
1201 // a shared obj, check if it is a local obj on this core
1202 int host = hostcore(ptr);
1203 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1205 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1206 if(isLarge(ptr, &type, &size) && isnotmarked) {
1207 // ptr is a large object and not marked or enqueued
1208 GC_BAMBOO_DEBUGPRINT(0xecec);
1209 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1210 GC_BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1211 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1212 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1214 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1216 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1217 BAMBOO_CACHE_FLUSH_LINE(ptr);
1218 } else if(isnotmarked) {
1219 // ptr is an unmarked active object on this core
1220 ALIGNSIZE(size, &isize);
1221 gccurr_heaptop += isize;
1222 GC_BAMBOO_DEBUGPRINT(0xaaaa);
1223 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1224 GC_BAMBOO_DEBUGPRINT_REG(isize);
1225 GC_BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1227 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1228 BAMBOO_CACHE_FLUSH_LINE(ptr);
1230 if(ptr + size > gcmarkedptrbound) {
1231 gcmarkedptrbound = ptr + size;
1232 } // if(ptr + size > gcmarkedptrbound)
1234 // ptr is not an active obj or has been marked
1236 } // if(isLarge(ptr, &type, &size)) else ...
1237 } /* can never reach here
1239 // check if this obj has been forwarded
1240 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1241 // send a msg to host informing that ptr is active
1242 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1243 gcself_numsendobjs++;
1244 MGCHashadd(gcforwardobjtbl, (int)ptr);
1247 }// if(isLocal(ptr)) else ...*/
1248 } // if(ISSHAREDOBJ(ptr))
1249 GC_BAMBOO_DEBUGPRINT(0xed06);
1252 // scan all pointers in ptr
1253 unsigned INTPTR * pointer;
1254 pointer=pointerarray[type];
1256 /* Array of primitives */
1258 } else if (((INTPTR)pointer)==1) {
1259 /* Array of pointers */
1260 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1261 int length=ao->___length___;
1263 for(j=0; j<length; j++) {
1265 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1269 INTPTR size=pointer[0];
1271 for(i=1; i<=size; i++) {
1272 unsigned int offset=pointer[i];
1273 void * objptr=*((void **)(((char *)ptr)+offset));
1276 } // if (pointer==0) else if ... else ...
1278 } // while(gc_moreItems2())
1279 GC_BAMBOO_DEBUGPRINT(0xed07);
1280 gcbusystatus = false;
1281 // send mark finish msg to core coordinator
1282 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1283 GC_BAMBOO_DEBUGPRINT(0xed08);
1284 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1285 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1286 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1287 gcself_numreceiveobjs;
1288 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1291 GC_BAMBOO_DEBUGPRINT(0xed09);
1292 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1293 gcself_numsendobjs, gcself_numreceiveobjs, false);
1296 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1297 GC_BAMBOO_DEBUGPRINT(0xed0a);
1299 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1300 GC_BAMBOO_DEBUGPRINT(0xed0b);
1303 } // while(MARKPHASE == gcphase)
1308 inline void compact2Heaptophelper_I(int coren,
1313 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1314 if(STARTUPCORE == coren) {
1316 gcmovestartaddr = *p;
1317 gcdstcore = gctopcore;
1318 gcblock2fill = *numblocks + 1;
1320 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1322 GC_BAMBOO_DEBUGPRINT_REG(coren);
1323 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1324 GC_BAMBOO_DEBUGPRINT_REG(*p);
1325 GC_BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1326 if(memneed < *remain) {
1327 GC_BAMBOO_DEBUGPRINT(0xd104);
1329 gcrequiredmems[coren] = 0;
1330 gcloads[gctopcore] += memneed;
1331 *remain = *remain - memneed;
1333 GC_BAMBOO_DEBUGPRINT(0xd105);
1334 // next available block
1336 gcfilledblocks[gctopcore] += 1;
1338 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1339 gcloads[gctopcore] = newbase;
1340 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1341 gcstopblock[gctopcore]++;
1342 gctopcore = NEXTTOPCORE(gctopblock);
1344 *numblocks = gcstopblock[gctopcore];
1345 *p = gcloads[gctopcore];
1347 *remain=(b<NUMCORES4GC) ?
1348 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1349 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1350 GC_BAMBOO_DEBUGPRINT(0xd106);
1351 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1352 GC_BAMBOO_DEBUGPRINT_REG(*p);
1353 GC_BAMBOO_DEBUGPRINT_REG(b);
1354 GC_BAMBOO_DEBUGPRINT_REG(*remain);
1355 } // if(memneed < remain)
1357 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1359 inline void compact2Heaptop() {
1360 // no cores with spare mem and some cores are blocked with pending move
1361 // find the current heap top and make them move to the heap top
1363 int numblocks = gcfilledblocks[gctopcore];
1364 p = gcloads[gctopcore];
1367 int remain = (b<NUMCORES4GC) ?
1368 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1369 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1370 // check if the top core finishes
1371 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1372 if(gccorestatus[gctopcore] != 0) {
1373 GC_BAMBOO_DEBUGPRINT(0xd101);
1374 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1375 // let the top core finishes its own work first
1376 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1377 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1380 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1382 GC_BAMBOO_DEBUGPRINT(0xd102);
1383 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1384 GC_BAMBOO_DEBUGPRINT_REG(p);
1385 GC_BAMBOO_DEBUGPRINT_REG(b);
1386 GC_BAMBOO_DEBUGPRINT_REG(remain);
1387 for(int i = 0; i < NUMCORES4GC; i++) {
1388 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1389 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1390 GC_BAMBOO_DEBUGPRINT(0xd103);
1391 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1392 if(gccorestatus[gctopcore] != 0) {
1393 GC_BAMBOO_DEBUGPRINT(0xd101);
1394 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1395 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1396 // the top core is not free now
1399 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1400 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1401 } // for(i = 0; i < NUMCORES4GC; i++)
1402 GC_BAMBOO_DEBUGPRINT(0xd106);
1403 } // void compact2Heaptop()
1405 inline void resolvePendingMoveRequest() {
1406 GC_BAMBOO_DEBUGPRINT(0xeb01);
1407 GC_BAMBOO_DEBUGPRINT(0xeeee);
1408 for(int k = 0; k < NUMCORES4GC; k++) {
1409 GC_BAMBOO_DEBUGPRINT(0xf000+k);
1410 GC_BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1411 GC_BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1412 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1413 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1415 GC_BAMBOO_DEBUGPRINT(0xffff);
1418 bool nosparemem = true;
1419 bool haspending = false;
1420 bool hasrunning = false;
1421 bool noblock = false;
1422 int dstcore = 0; // the core who need spare mem
1423 int sourcecore = 0; // the core who has spare mem
1424 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1426 // check if there are cores with spare mem
1427 if(gccorestatus[i] == 0) {
1428 // finished working, check if it still have spare mem
1429 if(gcfilledblocks[i] < gcstopblock[i]) {
1430 // still have spare mem
1433 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1438 if(gccorestatus[j] != 0) {
1439 // not finished, check if it has pending move requests
1440 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1445 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1446 } // if(gccorestatus[i] == 0) else ...
1448 } // if(!haspending)
1449 if(!nosparemem && haspending) {
1453 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1454 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1455 gcrequiredmems[dstcore],
1458 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1459 GC_BAMBOO_DEBUGPRINT(0xeb02);
1460 GC_BAMBOO_DEBUGPRINT_REG(sourcecore);
1461 GC_BAMBOO_DEBUGPRINT_REG(dstcore);
1462 GC_BAMBOO_DEBUGPRINT_REG(startaddr);
1463 GC_BAMBOO_DEBUGPRINT_REG(tomove);
1464 if(STARTUPCORE == dstcore) {
1465 GC_BAMBOO_DEBUGPRINT(0xeb03);
1466 gcdstcore = sourcecore;
1468 gcmovestartaddr = startaddr;
1469 gcblock2fill = tomove;
1471 GC_BAMBOO_DEBUGPRINT(0xeb04);
1472 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1473 startaddr, tomove, false);
1480 } // for(i = 0; i < NUMCORES4GC; i++)
1481 GC_BAMBOO_DEBUGPRINT(0xcccc);
1482 GC_BAMBOO_DEBUGPRINT_REG(hasrunning);
1483 GC_BAMBOO_DEBUGPRINT_REG(haspending);
1484 GC_BAMBOO_DEBUGPRINT_REG(noblock);
1486 if(!hasrunning && !noblock) {
1487 gcphase = SUBTLECOMPACTPHASE;
1491 } // void resovePendingMoveRequest()
1494 int numblocks; // block num for heap
1495 INTPTR base; // base virtual address of current heap block
1496 INTPTR ptr; // virtual address of current heap top
1497 int offset; // offset in current heap block
1498 int blockbase; // virtual address of current small block to check
1499 int blockbound; // bound virtual address of current small blcok
1500 int sblockindex; // index of the small blocks
1501 int top; // real size of current heap block to check
1502 int bound; // bound size of current heap block to check
1503 }; // struct moveHelper
1505 // If out of boundary of valid shared memory, return false, else return true
1506 inline bool nextSBlock(struct moveHelper * orig) {
1507 orig->blockbase = orig->blockbound;
1509 bool sbchanged = false;
1510 INTPTR origptr = orig->ptr;
1511 int blockbase = orig->blockbase;
1512 int blockbound = orig->blockbound;
1513 int bound = orig->bound;
1514 GC_BAMBOO_DEBUGPRINT(0xecc0);
1515 GC_BAMBOO_DEBUGPRINT_REG(blockbase);
1516 GC_BAMBOO_DEBUGPRINT_REG(blockbound);
1517 GC_BAMBOO_DEBUGPRINT_REG(bound);
1518 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1520 // check if across a big block
1521 // TODO now do not zero out the whole memory, maybe the last two conditions
1523 if((blockbase>=bound)||(origptr>=bound)
1524 ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
1526 // end of current heap block, jump to next one
1528 GC_BAMBOO_DEBUGPRINT(0xecc1);
1529 GC_BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1530 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1531 GC_BAMBOO_DEBUGPRINT(orig->base);
1532 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1534 orig->ptr = orig->base; // set current ptr to out of boundary too
1537 orig->blockbase = orig->base;
1538 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1541 BLOCKINDEX(orig->base, &blocknum);
1542 if(bamboo_smemtbl[blocknum] == 0) {
1544 goto innernextSBlock;
1546 // check the bamboo_smemtbl to decide the real bound
1547 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1548 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1549 orig->sblockindex += 1;
1551 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1553 // check if this sblock should be skipped or have special start point
1554 int sbstart = gcsbstarttbl[orig->sblockindex];
1557 GC_BAMBOO_DEBUGPRINT(0xecc2);
1558 orig->sblockindex += 1;
1559 orig->blockbase += BAMBOO_SMEM_SIZE;
1560 goto outernextSBlock;
1561 } else if((sbstart != 0) && (sbchanged)) {
1562 // the first time to access this SBlock
1563 GC_BAMBOO_DEBUGPRINT(0xecc3);
1564 // not start from the very beginning
1565 orig->blockbase = sbstart;
1566 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1568 // setup information for this sblock
1569 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1570 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1571 orig->ptr = orig->blockbase + orig->offset;
1572 GC_BAMBOO_DEBUGPRINT(0xecc4);
1573 GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1574 GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1575 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1576 GC_BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1577 GC_BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1578 GC_BAMBOO_DEBUGPRINT_REG(orig->offset);
1579 if(orig->ptr >= orig->bound) {
1580 // met a lobj, move to next block
1581 goto innernextSBlock;
1585 } // bool nextSBlock(struct moveHelper * orig)
1587 // return false if there are no available data to compact
1588 inline bool initOrig_Dst(struct moveHelper * orig,
1589 struct moveHelper * to) {
1592 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1593 to->bound = BAMBOO_SMEM_SIZE_L;
1594 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1596 GC_BAMBOO_DEBUGPRINT(0xef01);
1597 GC_BAMBOO_DEBUGPRINT_REG(to->base);
1598 INTPTR tobase = to->base;
1599 to->ptr = tobase + to->offset;
1600 #ifdef GC_CACHE_ADAPT
1601 // initialize the gc_cache_revise_information
1602 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1603 int toindex = (tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
1604 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1606 gc_cache_revise_infomation.to_page_index = toindex;
1607 gc_cache_revise_infomation.orig_page_start_va = -1;
1608 #endif // GC_CACHE_ADAPT
1610 // init the orig ptr
1611 orig->numblocks = 0;
1612 orig->base = tobase;
1614 BLOCKINDEX(orig->base, &blocknum);
1615 INTPTR origbase = orig->base;
1616 // check the bamboo_smemtbl to decide the real bound
1617 orig->bound = origbase + bamboo_smemtbl[blocknum];
1618 orig->blockbase = origbase;
1619 orig->sblockindex = (origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
1620 GC_BAMBOO_DEBUGPRINT(0xef02);
1621 GC_BAMBOO_DEBUGPRINT_REG(origbase);
1622 GC_BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1623 GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1624 GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1626 int sbstart = gcsbstarttbl[orig->sblockindex];
1628 GC_BAMBOO_DEBUGPRINT(0xef03);
1631 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1632 return nextSBlock(orig);
1633 } else if(sbstart != 0) {
1634 GC_BAMBOO_DEBUGPRINT(0xef04);
1635 orig->blockbase = sbstart;
1637 GC_BAMBOO_DEBUGPRINT(0xef05);
1638 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1639 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1640 orig->ptr = orig->blockbase + orig->offset;
1641 GC_BAMBOO_DEBUGPRINT(0xef06);
1642 GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1645 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1647 inline void nextBlock(struct moveHelper * to) {
1648 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1649 to->bound += BAMBOO_SMEM_SIZE;
1651 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1652 to->offset = BAMBOO_CACHE_LINE_SIZE;
1653 to->ptr = to->base + to->offset;
1654 } // void nextBlock(struct moveHelper * to)
1656 #ifdef GC_CACHE_ADAPT
1657 inline void samplingDataConvert(int current_ptr) {
1658 unsigned int tmp_factor =
1659 current_ptr-gc_cache_revise_infomation.to_page_start_va;
1660 int topage=gc_cache_revise_infomation.to_page_index;
1661 int oldpage = gc_cache_revise_infomation.orig_page_index;
1662 unsigned int * newtable=&gccachesamplingtbl_r[topage];
1663 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
1665 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1666 (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
1667 newtable=(unsigned int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1668 oldtable=(unsigned int*) (((char *)oldtable)+size_cachesamplingtbl_local);
1670 } // inline void samplingDataConvert(int)
1672 inline void completePageConvert(struct moveHelper * orig,
1673 struct moveHelper * to,
1680 tocompare = gc_cache_revise_infomation.to_page_end_va;
1683 tocompare = gc_cache_revise_infomation.orig_page_end_va;
1685 if(ptr >= tocompare) {
1686 // end of an orig/to page
1687 // compute the impact of this page for the new page
1688 samplingDataConvert(current_ptr);
1689 // prepare for an new orig page
1690 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1691 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1692 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1693 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1694 gc_cache_revise_infomation.orig_page_index = tmp_index;
1695 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1697 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
1698 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1699 gc_cache_revise_infomation.to_page_index =
1700 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1703 } // inline void completePageConvert(...)
1704 #endif // GC_CACHE_ADAPT
1706 // endaddr does not contain spaces for headers
1707 inline bool moveobj(struct moveHelper * orig,
1708 struct moveHelper * to,
1710 if(stopblock == 0) {
1714 GC_BAMBOO_DEBUGPRINT(0xe201);
1715 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1716 GC_BAMBOO_DEBUGPRINT_REG(to->ptr);
1723 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1724 orig->ptr = (int*)(orig->ptr) + 1;
1726 #ifdef GC_CACHE_ADAPT
1727 completePageConvert(orig, to, to->ptr, false);
1729 INTPTR origptr = orig->ptr;
1730 int origbound = orig->bound;
1731 int origblockbound = orig->blockbound;
1732 if((origptr >= origbound) || (origptr == origblockbound)) {
1733 if(!nextSBlock(orig)) {
1734 // finished, no more data
1739 GC_BAMBOO_DEBUGPRINT(0xe202);
1740 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1741 GC_BAMBOO_DEBUGPRINT(((int *)(origptr))[0]);
1742 // check the obj's type, size and mark flag
1743 type = ((int *)(origptr))[0];
1746 // end of this block, go to next one
1747 if(!nextSBlock(orig)) {
1748 // finished, no more data
1752 } else if(type < NUMCLASSES) {
1754 size = classsize[type];
1757 struct ArrayObject *ao=(struct ArrayObject *)(origptr);
1758 int elementsize=classsize[type];
1759 int length=ao->___length___;
1760 size=sizeof(struct ArrayObject)+length*elementsize;
1762 mark = ((int *)(origptr))[6];
1763 bool isremote = ((((int *)(origptr))[6] & REMOTEM) != 0);
1764 GC_BAMBOO_DEBUGPRINT(0xe203);
1765 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1766 GC_BAMBOO_DEBUGPRINT_REG(size);
1767 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
1768 // should be able to across it
1769 if((mark & MARKED) != 0) {
1770 int totop = to->top;
1771 int tobound = to->bound;
1772 GC_BAMBOO_DEBUGPRINT(0xe204);
1776 // marked obj, copy it to current heap top
1777 // check to see if remaining space is enough
1778 if(totop + isize > tobound) {
1779 // fill 0 indicating the end of this block
1780 BAMBOO_MEMSET_WH(to->ptr, '\0', tobound - totop);
1781 // fill the header of this block and then go to next block
1782 to->offset += tobound - totop;
1783 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1784 (*((int*)(to->base))) = to->offset;
1785 #ifdef GC_CACHE_ADAPT
1786 int tmp_ptr = to->ptr;
1787 #endif // GC_CACHE_ADAPT
1789 #ifdef GC_CACHE_ADAPT
1790 completePageConvert(orig, to, tmp_ptr, true);
1791 #endif // GC_CACHE_ADAPT
1792 if(stopblock == to->numblocks) {
1793 // already fulfilled the block
1795 } // if(stopblock == to->numblocks)
1796 } // if(to->top + isize > to->bound)
1797 // set the mark field to 2, indicating that this obj has been moved
1798 // and need to be flushed
1799 ((int *)(origptr))[6] = COMPACTED;
1800 INTPTR toptr = to->ptr;
1801 if(toptr != origptr) {
1802 if((int)(origptr) < (int)(toptr)+size) {
1803 memmove(toptr, origptr, size);
1805 memcpy(toptr, origptr, size);
1807 // fill the remaining space with -2
1808 BAMBOO_MEMSET_WH(toptr+size, -2, isize-size);
1810 // store mapping info
1811 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1812 #ifdef LOCALHASHTBL_TEST
1813 RuntimeHashadd_I(gcpointertbl, origptr, toptr);
1815 mgchashInsert_I(gcpointertbl, origptr, toptr);
1818 // add to the sharedptbl
1819 if(gcsharedptbl != NULL) {
1820 mgcsharedhashInsert_I(gcsharedptbl, origptr, toptr);
1823 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1824 GC_BAMBOO_DEBUGPRINT(0xcdce);
1825 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1826 GC_BAMBOO_DEBUGPRINT_REG(toptr);
1827 GC_BAMBOO_DEBUGPRINT_REG(isize);
1828 gccurr_heaptop -= isize;
1830 to->offset += isize;
1832 #ifdef GC_CACHE_ADAPT
1833 int tmp_ptr = to->ptr;
1834 #endif // GC_CACHE_ADAPT
1835 if(to->top == to->bound) {
1836 // fill the header of this block and then go to next block
1837 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1838 (*((int*)(to->base))) = to->offset;
1841 #ifdef GC_CACHE_ADAPT
1842 completePageConvert(orig, to, tmp_ptr, true);
1843 #endif // GC_CACHE_ADAPT
1845 GC_BAMBOO_DEBUGPRINT(0xe205);
1849 GC_BAMBOO_DEBUGPRINT_REG(isize);
1850 GC_BAMBOO_DEBUGPRINT_REG(size);
1851 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1852 GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1853 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1854 GC_BAMBOO_DEBUGPRINT(0xe206);
1855 if(!nextSBlock(orig)) {
1856 // finished, no more data
1860 GC_BAMBOO_DEBUGPRINT(0xe207);
1861 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1863 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
1865 // should be invoked with interrupt closed
1866 inline int assignSpareMem_I(int sourcecore,
1871 BLOCKINDEX(gcloads[sourcecore], &b);
1872 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
1873 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
1874 int remain = boundptr - gcloads[sourcecore];
1875 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
1876 *startaddr = gcloads[sourcecore];
1877 *tomove = gcfilledblocks[sourcecore] + 1;
1878 if(memneed < remain) {
1879 gcloads[sourcecore] += memneed;
1882 // next available block
1883 gcfilledblocks[sourcecore] += 1;
1885 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
1886 gcloads[sourcecore] = newbase;
1887 return requiredmem-remain;
1889 } // int assignSpareMem_I(int ,int * , int * , int * )
1891 // should be invoked with interrupt closed
1892 inline bool gcfindSpareMem_I(int * startaddr,
1897 for(int k = 0; k < NUMCORES4GC; k++) {
1898 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
1899 // check if this stopped core has enough mem
1900 assignSpareMem_I(k, requiredmem, tomove, startaddr);
1905 // if can not find spare mem right now, hold the request
1906 gcrequiredmems[requiredcore] = requiredmem;
1909 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
1911 inline bool compacthelper(struct moveHelper * orig,
1912 struct moveHelper * to,
1915 bool * localcompact) {
1916 // scan over all objs in this block, compact the marked objs
1917 // loop stop when finishing either scanning all active objs or
1918 // fulfilled the gcstopblock
1919 GC_BAMBOO_DEBUGPRINT(0xe101);
1920 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
1921 GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1923 while(orig->ptr < gcmarkedptrbound) {
1924 bool stop = moveobj(orig, to, gcblock2fill);
1929 #ifdef GC_CACHE_ADAPT
1930 // end of an to page, wrap up its information
1931 samplingDataConvert(to->ptr);
1932 #endif // GC_CACHE_ADAPT
1933 // if no objs have been compact, do nothing,
1934 // otherwise, fill the header of this block
1935 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
1936 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1937 (*((int*)(to->base))) = to->offset;
1941 to->top -= BAMBOO_CACHE_LINE_SIZE;
1942 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
1944 *heaptopptr = to->ptr;
1945 *filledblocks = to->numblocks;
1947 GC_BAMBOO_DEBUGPRINT(0xe102);
1948 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949 GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1950 GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1951 GC_BAMBOO_DEBUGPRINT_REG(*filledblocks);
1952 GC_BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
1954 // send msgs to core coordinator indicating that the compact is finishing
1955 // send compact finish message to core coordinator
1956 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1957 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
1958 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
1959 if(orig->ptr < gcmarkedptrbound) {
1960 GC_BAMBOO_DEBUGPRINT(0xe103);
1963 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1964 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
1965 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
1966 GC_BAMBOO_DEBUGPRINT(0xe104);
1969 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1970 GC_BAMBOO_DEBUGPRINT(0xe105);
1973 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1975 GC_BAMBOO_DEBUGPRINT(0xe106);
1976 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1981 if(orig->ptr < gcmarkedptrbound) {
1982 GC_BAMBOO_DEBUGPRINT(0xe107);
1985 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1986 *filledblocks, *heaptopptr, gccurr_heaptop, false);
1988 GC_BAMBOO_DEBUGPRINT(0xe108);
1989 GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1990 // finish compacting
1991 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1992 *filledblocks, *heaptopptr, 0, false);
1994 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
1996 if(orig->ptr < gcmarkedptrbound) {
1997 GC_BAMBOO_DEBUGPRINT(0xe109);
1998 // still have unpacked obj
2006 GC_BAMBOO_DEBUGPRINT(0xe10a);
2008 to->ptr = gcmovestartaddr;
2009 to->numblocks = gcblock2fill - 1;
2010 to->bound = (to->numblocks==0) ?
2011 BAMBOO_SMEM_SIZE_L :
2012 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2013 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2014 to->offset = to->ptr - to->base;
2015 to->top = (to->numblocks==0) ?
2016 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2018 to->offset = BAMBOO_CACHE_LINE_SIZE;
2019 to->ptr += to->offset; // for header
2020 to->top += to->offset;
2021 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2022 *localcompact = true;
2024 *localcompact = false;
2026 #ifdef GC_CACHE_ADAPT
2027 // initialize the gc_cache_revise_information
2028 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2029 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2030 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2031 gc_cache_revise_infomation.to_page_index =
2032 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2033 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2034 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2035 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2036 gc_cache_revise_infomation.orig_page_index =
2037 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2038 #endif // GC_CACHE_ADAPT
2041 GC_BAMBOO_DEBUGPRINT(0xe10b);
2043 } // void compacthelper()
2045 inline void compact() {
2046 if(COMPACTPHASE != gcphase) {
2047 BAMBOO_EXIT(0xb002);
2050 // initialize pointers for comapcting
2051 struct moveHelper * orig =
2052 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2053 struct moveHelper * to =
2054 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2056 if(!initOrig_Dst(orig, to)) {
2057 // no available data to compact
2058 // send compact finish msg to STARTUP core
2059 GC_BAMBOO_DEBUGPRINT(0xe001);
2060 GC_BAMBOO_DEBUGPRINT_REG(to->base);
2061 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2062 0, to->base, 0, false);
2067 #ifdef GC_CACHE_ADAPT
2068 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2069 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2070 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2071 gc_cache_revise_infomation.orig_page_index =
2072 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2073 #endif // GC_CACHE_ADAPT
2075 int filledblocks = 0;
2076 INTPTR heaptopptr = 0;
2077 bool localcompact = true;
2078 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2084 // if return NULL, means
2085 // 1. objptr is NULL
2086 // 2. objptr is not a shared obj
2087 // in these cases, remain the original value is OK
2088 inline void * flushObj(void * objptr) {
2089 GC_BAMBOO_DEBUGPRINT(0xe401);
2090 if(objptr == NULL) {
2093 void * dstptr = NULL;
2094 if(ISSHAREDOBJ(objptr)) {
2095 GC_BAMBOO_DEBUGPRINT(0xe402);
2096 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2097 // a shared obj ptr, change to new address
2098 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2099 #ifdef LOCALHASHTBL_TEST
2100 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2102 dstptr = mgchashSearch(gcpointertbl, objptr);
2104 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2105 GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2107 if(NULL == dstptr) {
2109 GC_BAMBOO_DEBUGPRINT(0xe403);
2110 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2111 GC_BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2112 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2113 // error! the obj is right on this core, but cannot find it
2114 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2115 BAMBOO_EXIT(0xb003);
2117 int hostc = hostcore(objptr);
2118 // check the corresponsing sharedptbl
2119 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2120 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2122 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2123 if(dstptr != NULL) {
2124 #ifdef LOCALHASHTBL_TEST
2125 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2127 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2131 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2133 if(dstptr == NULL) {
2134 // still can not get the mapping info,
2135 // send msg to host core for the mapping info
2136 gcobj2map = (int)objptr;
2139 // the first time require the mapping, send msg to the hostcore
2140 // for the mapping info
2141 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2142 BAMBOO_NUM_OF_CORE, false);
2148 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2149 #ifdef LOCALHASHTBL_TEST
2150 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2152 dstptr = mgchashSearch(gcpointertbl, objptr);
2154 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2155 } // if(dstptr == NULL)
2156 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2157 GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2158 } // if(NULL == dstptr)
2159 } // if(ISSHAREDOBJ(objptr))
2160 // if not a shared obj, return NULL to indicate no need to flush
2161 GC_BAMBOO_DEBUGPRINT(0xe404);
2163 } // void flushObj(void * objptr)
2165 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2167 // flush current stack
2168 while(stackptr!=NULL) {
2169 for(i=0; i<stackptr->size; i++) {
2170 if(stackptr->array[i] != NULL) {
2171 void * dst = flushObj(stackptr->array[i]);
2173 stackptr->array[i] = dst;
2177 stackptr=stackptr->next;
2181 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2182 for(i=0; i<NUMCLASSES; i++) {
2183 struct parameterwrapper ** queues =
2184 objectqueues[BAMBOO_NUM_OF_CORE][i];
2185 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2186 for(j = 0; j < length; ++j) {
2187 struct parameterwrapper * parameter = queues[j];
2188 struct ObjectHash * set=parameter->objectset;
2189 struct ObjectNode * ptr=set->listhead;
2191 void * dst = flushObj((void *)ptr->key);
2197 ObjectHashrehash(set);
2202 // flush current task descriptor
2203 if(currtpd != NULL) {
2204 for(i=0; i<currtpd->numParameters; i++) {
2205 void * dst = flushObj(currtpd->parameterArray[i]);
2207 currtpd->parameterArray[i] = dst;
2212 // flush active tasks
2213 if(activetasks != NULL) {
2214 struct genpointerlist * ptr=activetasks->list;
2216 struct taskparamdescriptor *tpd=ptr->src;
2218 for(i=0; i<tpd->numParameters; i++) {
2219 void * dst = flushObj(tpd->parameterArray[i]);
2221 tpd->parameterArray[i] = dst;
2226 genrehash(activetasks);
2229 // flush cached transferred obj
2230 struct QueueItem * tmpobjptr = getHead(&objqueue);
2231 while(tmpobjptr != NULL) {
2232 struct transObjInfo * objInfo =
2233 (struct transObjInfo *)(tmpobjptr->objectptr);
2234 void * dst = flushObj(objInfo->objptr);
2236 objInfo->objptr = dst;
2238 tmpobjptr = getNextQueueItem(tmpobjptr);
2241 // flush cached objs to be transferred
2242 struct QueueItem * item = getHead(totransobjqueue);
2243 while(item != NULL) {
2244 struct transObjInfo * totransobj =
2245 (struct transObjInfo *)(item->objectptr);
2246 void * dst = flushObj(totransobj->objptr);
2248 totransobj->objptr = dst;
2250 item = getNextQueueItem(item);
2251 } // while(item != NULL)
2253 // enqueue lock related info
2254 for(i = 0; i < runtime_locklen; ++i) {
2255 void * dst = flushObj(runtime_locks[i].redirectlock);
2257 runtime_locks[i].redirectlock = (int)dst;
2259 if(runtime_locks[i].value != NULL) {
2260 void * dst=flushObj(runtime_locks[i].value);
2262 runtime_locks[i].value = (int)dst;
2267 } // void flushRuntimeObj(struct garbagelist * stackptr)
2269 inline void transmappinginfo() {
2270 // broadcast the sharedptbl pointer
2271 for(int i = 0; i < NUMCORESACTIVE; i++) {
2272 if(i != BAMBOO_NUM_OF_CORE) {
2273 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2277 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2278 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2282 inline void flush(struct garbagelist * stackptr) {
2284 flushRuntimeObj(stackptr);
2287 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2288 bool hasItems = gc_moreItems_I();
2289 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2294 GC_BAMBOO_DEBUGPRINT(0xe301);
2295 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2296 void * ptr = gc_dequeue_I();
2297 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2298 if(ISSHAREDOBJ(ptr)) {
2299 // should be a local shared obj and should have mapping info
2300 ptr = flushObj(ptr);
2301 GC_BAMBOO_DEBUGPRINT(0xe302);
2302 GC_BAMBOO_DEBUGPRINT_REG(ptr);
2303 GC_BAMBOO_DEBUGPRINT_REG(tptr);
2304 GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2306 BAMBOO_EXIT(0xb004);
2308 } // if(ISSHAREDOBJ(ptr))
2309 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2310 int type = ((int *)(ptr))[0];
2311 // scan all pointers in ptr
2312 unsigned INTPTR * pointer;
2313 pointer=pointerarray[type];
2314 GC_BAMBOO_DEBUGPRINT(0xe303);
2315 GC_BAMBOO_DEBUGPRINT_REG(pointer);
2317 /* Array of primitives */
2319 } else if (((INTPTR)pointer)==1) {
2320 GC_BAMBOO_DEBUGPRINT(0xe304);
2321 /* Array of pointers */
2322 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2323 int length=ao->___length___;
2325 for(j=0; j<length; j++) {
2326 GC_BAMBOO_DEBUGPRINT(0xe305);
2328 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2329 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2330 if(objptr != NULL) {
2331 void * dst = flushObj(objptr);
2333 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2338 GC_BAMBOO_DEBUGPRINT(0xe306);
2339 INTPTR size=pointer[0];
2341 for(i=1; i<=size; i++) {
2342 GC_BAMBOO_DEBUGPRINT(0xe307);
2343 unsigned int offset=pointer[i];
2344 void * objptr=*((void **)(((char *)ptr)+offset));
2345 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2346 if(objptr != NULL) {
2347 void * dst = flushObj(objptr);
2349 *((void **)(((char *)ptr)+offset)) = dst;
2352 } // for(i=1; i<=size; i++)
2353 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2354 // restore the mark field, indicating that this obj has been flushed
2355 if(ISSHAREDOBJ(ptr)) {
2356 ((int *)(ptr))[6] = INIT;
2358 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2359 } // while(gc_moreItems())
2360 GC_BAMBOO_DEBUGPRINT(0xe308);
2362 // TODO bug here: the startup core contains all lobjs' info, thus all the
2363 // lobjs are flushed in sequence.
2365 while(gc_lobjmoreItems_I()) {
2366 GC_BAMBOO_DEBUGPRINT(0xe309);
2367 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2368 ptr = flushObj(ptr);
2369 GC_BAMBOO_DEBUGPRINT(0xe30a);
2370 GC_BAMBOO_DEBUGPRINT_REG(ptr);
2371 GC_BAMBOO_DEBUGPRINT_REG(tptr);
2372 GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2374 BAMBOO_EXIT(0xb005);
2376 if(((int *)(ptr))[6] == COMPACTED) {
2377 int type = ((int *)(ptr))[0];
2378 // scan all pointers in ptr
2379 unsigned INTPTR * pointer;
2380 pointer=pointerarray[type];
2381 GC_BAMBOO_DEBUGPRINT(0xe30b);
2382 GC_BAMBOO_DEBUGPRINT_REG(pointer);
2384 /* Array of primitives */
2386 } else if (((INTPTR)pointer)==1) {
2387 GC_BAMBOO_DEBUGPRINT(0xe30c);
2388 /* Array of pointers */
2389 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2390 int length=ao->___length___;
2392 for(j=0; j<length; j++) {
2393 GC_BAMBOO_DEBUGPRINT(0xe30d);
2395 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2396 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2397 if(objptr != NULL) {
2398 void * dst = flushObj(objptr);
2400 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2405 GC_BAMBOO_DEBUGPRINT(0xe30e);
2406 INTPTR size=pointer[0];
2408 for(i=1; i<=size; i++) {
2409 GC_BAMBOO_DEBUGPRINT(0xe30f);
2410 unsigned int offset=pointer[i];
2411 void * objptr=*((void **)(((char *)ptr)+offset));
2413 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2414 if(objptr != NULL) {
2415 void * dst = flushObj(objptr);
2417 *((void **)(((char *)ptr)+offset)) = dst;
2420 } // for(i=1; i<=size; i++)
2421 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2422 // restore the mark field, indicating that this obj has been flushed
2423 ((int *)(ptr))[6] = INIT;
2424 } // if(((int *)(ptr))[6] == COMPACTED)
2425 } // while(gc_lobjmoreItems())
2426 GC_BAMBOO_DEBUGPRINT(0xe310);
2428 // send flush finish message to core coordinator
2429 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2430 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2432 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2434 GC_BAMBOO_DEBUGPRINT(0xe311);
2437 #ifdef GC_CACHE_ADAPT
2438 // prepare for cache adaption:
2439 // -- flush the shared heap
2440 // -- clean dtlb entries
2441 // -- change cache strategy
2442 void cacheAdapt_gc(bool isgccachestage) {
2443 // flush the shared heap
2444 BAMBOO_CACHE_FLUSH_L2();
2446 // clean the dtlb entries
2447 BAMBOO_CLEAN_DTLB();
2449 // change the cache strategy
2450 gccachestage = isgccachestage;
2451 } // cacheAdapt_gc(bool isgccachestage)
2453 // the master core decides how to adapt cache strategy for the mutator
2454 // according to collected statistic data
2456 // make all pages hfh
2457 int cacheAdapt_policy_h4h(){
2458 unsigned int page_index = 0;
2460 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2462 int * tmp_p = gccachepolicytbl+1;
2463 for(page_index = 0; page_index < page_num; page_index++) {
2464 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2465 bamboo_cache_policy_t policy = {0};
2466 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2467 *tmp_p = page_index;
2469 *tmp_p = policy.word;
2475 } // int cacheAdapt_policy_hfh()
2477 // make all pages local as non-cache-adaptable gc local mode
2478 int cacheAdapt_policy_local(){
2479 unsigned int page_index = 0;
2481 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2483 int * tmp_p = gccachepolicytbl+1;
2484 for(page_index = 0; page_index < page_num; page_index++) {
2485 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2486 bamboo_cache_policy_t policy = {0};
2488 BLOCKINDEX(page_sva, &block);
2489 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2490 // locally cache the page in the hotest core
2491 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2492 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2493 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2494 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2495 *tmp_p = page_index;
2497 *tmp_p = policy.word;
2503 } // int cacheAdapt_policy_local()
2505 int cacheAdapt_policy_hotest(){
2506 unsigned int page_index = 0;
2508 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2510 int * tmp_p = gccachepolicytbl+1;
2511 for(page_index = 0; page_index < page_num; page_index++) {
2512 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2513 bamboo_cache_policy_t policy = {0};
2515 unsigned int hotfreq = 0;
2517 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2518 for(int i = 0; i < NUMCORESACTIVE; i++) {
2519 unsigned int freq = *local_tbl;
2520 local_tbl=(unsigned int *)(
2521 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2523 // check the freqency, decide if this page is hot for the core
2524 if(hotfreq < freq) {
2530 // Decide the cache strategy for this page
2531 // If decide to adapt a new cache strategy, write into the shared block of
2532 // the gcsharedsamplingtbl. The mem recording information that has been
2533 // written is enough to hold the information.
2534 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2536 // this page has not been accessed, do not change its cache policy
2539 // locally cache the page in the hotest core
2540 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2541 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2542 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2543 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2544 *tmp_p = page_index;
2546 *tmp_p = policy.word;
2553 } // int cacheAdapt_policy_hotest()
2555 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
2556 // cache the page on the core that accesses it the most if that core accesses
2557 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
2559 int cacheAdapt_policy_dominate(){
2560 unsigned int page_index = 0;
2562 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2564 int * tmp_p = gccachepolicytbl+1;
2565 for(page_index = 0; page_index < page_num; page_index++) {
2566 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2567 bamboo_cache_policy_t policy = {0};
2569 unsigned long long totalfreq = 0;
2570 unsigned int hotfreq = 0;
2572 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2573 for(int i = 0; i < NUMCORESACTIVE; i++) {
2574 unsigned int freq = *local_tbl;
2575 local_tbl=(unsigned int *)(
2576 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2578 // check the freqency, decide if this page is hot for the core
2579 if(hotfreq < freq) {
2585 // Decide the cache strategy for this page
2586 // If decide to adapt a new cache strategy, write into the shared block of
2588 // Format: page start va + cache policy
2590 // this page has not been accessed, do not change its cache policy
2594 (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
2595 hotfreq/=BAMBOO_PAGE_SIZE;
2596 if(hotfreq < totalfreq) {
2598 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2600 // locally cache the page in the hotest core
2601 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2602 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2603 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2604 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2606 *tmp_p = page_index;
2608 *tmp_p = policy.word;
2614 } // int cacheAdapt_policy_dominate()
2616 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
2618 void gc_quicksort(unsigned long long *array,
2624 int rightIdx = right;
2625 if((right-left+1) >= 1) {
2626 pivot = (left+right)/2;
2627 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
2628 int pivotValue = array[pivot*3-offset];
2629 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
2632 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
2635 // swap [leftIdx] & [rightIdx]
2636 for(int k = 0; k < 3; k++) {
2637 unsigned long long tmp = array[3*rightIdx-k];
2638 array[3*rightIdx-k] = array[3*leftIdx-k];
2639 array[3*leftIdx-k] = tmp;
2643 if((leftIdx-1) == pivot) {
2644 pivot = rightIdx = rightIdx + 1;
2645 } else if((leftIdx+1) == pivot) {
2646 pivot = leftIdx = leftIdx-1;
2649 gc_quicksort(array, left, pivot-1, offset);
2650 gc_quicksort(array, pivot+1, right, offset);
2653 } // void gc_quicksort(...)
2655 // Every page cached on the core that accesses it the most.
2656 // Check to see if any core's pages total more accesses than threshold
2657 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2658 // most remote accesses and hash for home them until we get below
2659 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
2660 int cacheAdapt_policy_overload(){
2661 unsigned int page_index = 0;
2663 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2665 int * tmp_p = gccachepolicytbl+1;
2666 unsigned long long workload[NUMCORESACTIVE];
2667 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2668 unsigned long long total_workload = 0;
2669 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2670 memset(core2heavypages,0,
2671 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2672 for(page_index = 0; page_index < page_num; page_index++) {
2673 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2674 bamboo_cache_policy_t policy = {0};
2676 unsigned long long totalfreq = 0;
2677 unsigned int hotfreq = 0;
2679 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2680 for(int i = 0; i < NUMCORESACTIVE; i++) {
2681 unsigned int freq = *local_tbl;
2682 local_tbl=(unsigned int *)(
2683 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2685 // check the freqency, decide if this page is hot for the core
2686 if(hotfreq < freq) {
2691 // Decide the cache strategy for this page
2692 // If decide to adapt a new cache strategy, write into the shared block of
2693 // the gcsharedsamplingtbl. The mem recording information that has been
2694 // written is enough to hold the information.
2695 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2697 // this page has not been accessed, do not change its cache policy
2701 totalfreq/=BAMBOO_PAGE_SIZE;
2702 hotfreq/=BAMBOO_PAGE_SIZE;
2703 // locally cache the page in the hotest core
2704 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2705 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2706 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2707 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2708 *tmp_p = page_index;
2710 *tmp_p = policy.word;
2713 workload[hotestcore] += totalfreq;
2714 total_workload += totalfreq;
2715 // insert into core2heavypages using quicksort
2716 unsigned long long remoteaccess = totalfreq - hotfreq;
2717 int index = (int)core2heavypages[hotestcore][0];
2718 core2heavypages[hotestcore][3*index+3] = remoteaccess;
2719 core2heavypages[hotestcore][3*index+2] = totalfreq;
2720 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2721 core2heavypages[hotestcore][0]++;
2724 unsigned long long workload_threshold =
2725 total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2726 // Check the workload of each core
2727 for(int i = 0; i < NUMCORESACTIVE; i++) {
2729 int index = (int)core2heavypages[i][0];
2730 if(workload[i] > workload_threshold) {
2731 // sort according to the remoteaccess
2732 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2733 while((workload[i] > workload_threshold) && (j<index*3)) {
2734 // hfh those pages with more remote accesses
2735 bamboo_cache_policy_t policy = {0};
2736 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2737 *((int*)core2heavypages[i][j]) = policy.word;
2738 workload[i] -= core2heavypages[i][j+1];
2745 } // int cacheAdapt_policy_overload()
2747 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
2748 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
2749 // Every page cached on the core that accesses it the most.
2750 // Check to see if any core's pages total more accesses than threshold
2751 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2752 // most remote accesses and hash for home them until we get below
2753 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
2754 // Sort pages based on activity....
2755 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
2756 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
2757 // then start hfh these pages(selecting the ones with the most remote
2758 // accesses first or fewest local accesses) until we get below
2759 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
2760 int cacheAdapt_policy_crowd(){
2761 unsigned int page_index = 0;
2763 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2765 int * tmp_p = gccachepolicytbl+1;
2766 unsigned long long workload[NUMCORESACTIVE];
2767 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2768 unsigned long long total_workload = 0;
2769 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2770 memset(core2heavypages,0,
2771 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2772 for(page_index = 0; page_index < page_num; page_index++) {
2773 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2774 bamboo_cache_policy_t policy = {0};
2776 unsigned long long totalfreq = 0;
2777 unsigned int hotfreq = 0;
2779 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2780 for(int i = 0; i < NUMCORESACTIVE; i++) {
2781 unsigned int freq = *local_tbl;
2782 local_tbl=(unsigned int *)(
2783 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2785 // check the freqency, decide if this page is hot for the core
2786 if(hotfreq < freq) {
2791 // Decide the cache strategy for this page
2792 // If decide to adapt a new cache strategy, write into the shared block of
2793 // the gcsharedsamplingtbl. The mem recording information that has been
2794 // written is enough to hold the information.
2795 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2797 // this page has not been accessed, do not change its cache policy
2800 totalfreq/=BAMBOO_PAGE_SIZE;
2801 hotfreq/=BAMBOO_PAGE_SIZE;
2802 // locally cache the page in the hotest core
2803 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2804 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2805 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2806 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2807 *tmp_p = page_index;
2809 *tmp_p = policy.word;
2812 workload[hotestcore] += totalfreq;
2813 total_workload += totalfreq;
2814 // insert into core2heavypages using quicksort
2815 unsigned long long remoteaccess = totalfreq - hotfreq;
2816 int index = (int)core2heavypages[hotestcore][0];
2817 core2heavypages[hotestcore][3*index+3] = remoteaccess;
2818 core2heavypages[hotestcore][3*index+2] = totalfreq;
2819 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2820 core2heavypages[hotestcore][0]++;
2823 unsigned long long workload_threshold =
2824 total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2825 // Check the workload of each core
2826 for(int i = 0; i < NUMCORESACTIVE; i++) {
2828 int index = (int)core2heavypages[i][0];
2829 if(workload[i] > workload_threshold) {
2830 // sort according to the remoteaccess
2831 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2832 while((workload[i] > workload_threshold) && (j<index*3)) {
2833 // hfh those pages with more remote accesses
2834 bamboo_cache_policy_t policy = {0};
2835 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2836 *((int*)core2heavypages[i][j]) = policy.word;
2837 workload[i] -= core2heavypages[i][j+1];
2842 // Check if the accesses are crowded on few pages
2843 // sort according to the total access
2845 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
2846 unsigned long long threshold =
2847 GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2848 int num_crowded = 0;
2849 unsigned long long t_workload = 0;
2851 t_workload += core2heavypages[i][j+num_crowded*3+1];
2853 } while(t_workload < threshold);
2854 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
2855 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
2856 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
2857 // need to hfh these pages
2858 // sort the pages according to remote access
2859 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
2860 // h4h those pages with more remote accesses
2861 bamboo_cache_policy_t policy = {0};
2862 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2863 *((int*)core2heavypages[i][j]) = policy.word;
2864 workload[i] -= core2heavypages[i][j+1];
2865 t_workload -= core2heavypages[i][j+1];
2867 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2873 } // int cacheAdapt_policy_overload()
2875 void cacheAdapt_master() {
2876 #ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
2877 gc_output_cache_sampling_r();
2878 #endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
2880 // check the statistic data
2881 // for each page, decide the new cache strategy
2882 #ifdef GC_CACHE_ADAPT_POLICY1
2883 numchanged = cacheAdapt_policy_h4h();
2884 #elif defined GC_CACHE_ADAPT_POLICY2
2885 numchanged = cacheAdapt_policy_local();
2886 #elif defined GC_CACHE_ADAPT_POLICY3
2887 numchanged = cacheAdapt_policy_hotest();
2888 #elif defined GC_CACHE_ADAPT_POLICY4
2889 numchanged = cacheAdapt_policy_dominate();
2890 #elif defined GC_CACHE_ADAPT_POLICY5
2891 numchanged = cacheAdapt_policy_overload();
2892 #elif defined GC_CACHE_ADAPT_POLICY6
2893 numchanged = cacheAdapt_policy_crowd();
2895 *gccachepolicytbl = numchanged;
2898 // adapt the cache strategy for the mutator
2899 void cacheAdapt_mutator() {
2900 int numchanged = *gccachepolicytbl;
2901 // check the changes and adapt them
2902 int * tmp_p = gccachepolicytbl+1;
2903 while(numchanged--) {
2904 // read out the policy
2905 int page_index = *tmp_p;
2906 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
2908 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
2909 policy, BAMBOO_PAGE_SIZE);
2915 void gc_output_cache_sampling() {
2916 unsigned int page_index = 0;
2918 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2919 for(page_index = 0; page_index < page_num; page_index++) {
2920 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2922 BLOCKINDEX(page_sva, &block);
2923 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2924 tprintf("va: %x page_index: %d host: %d\n",
2925 (int)page_sva, page_index, coren);
2926 for(int i = 0; i < NUMCORESACTIVE; i++) {
2927 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl
2928 +size_cachesamplingtbl_local*i);
2929 unsigned int freq = local_tbl[page_index];
2930 printf("%8d ",freq);
2934 printf("=================\n");
2935 } // gc_output_cache_sampling
2937 void gc_output_cache_sampling_r() {
2938 unsigned int page_index = 0;
2940 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2941 for(page_index = 0; page_index < page_num; page_index++) {
2942 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2944 BLOCKINDEX(page_sva, &block);
2945 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2946 tprintf("va: %x page_index: %d host: %d\n",
2947 (int)page_sva, page_index, coren);
2948 for(int i = 0; i < NUMCORESACTIVE; i++) {
2949 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl_r
2950 +size_cachesamplingtbl_local_r*i);
2951 unsigned int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
2952 printf("%8d ",freq);
2956 printf("=================\n");
2957 } // gc_output_cache_sampling
2958 #endif // GC_CACHE_ADAPT
2960 inline void gc_collect(struct garbagelist * stackptr) {
2961 // inform the master that this core is at a gc safe point and is ready to
2963 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
2964 self_numreceiveobjs, false);
2966 // core collector routine
2968 if(INITPHASE == gcphase) {
2972 #ifdef RAWPATH // TODO GC_DEBUG
2973 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
2976 #ifdef GC_CACHE_ADAPT
2977 // prepare for cache adaption:
2978 cacheAdapt_gc(true);
2979 #endif // GC_CACHE_ADAPT
2980 //send init finish msg to core coordinator
2981 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
2984 if(MARKPHASE == gcphase) {
2988 #ifdef RAWPATH // TODO GC_DEBUG
2989 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
2990 udn_tile_coord_y());
2992 mark(true, stackptr);
2993 #ifdef RAWPATH // TODO GC_DEBUG
2994 printf("(%x,%x) Finish mark phase, start compact phase\n",
2995 udn_tile_coord_x(), udn_tile_coord_y());
2998 #ifdef RAWPATH // TODO GC_DEBUG
2999 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3000 udn_tile_coord_y());
3004 if(MAPPHASE == gcphase) {
3008 #ifdef RAWPATH // TODO GC_DEBUG
3009 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3010 udn_tile_coord_y());
3013 #ifdef RAWPATH // TODO GC_DEBUG
3014 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3015 udn_tile_coord_y());
3019 if(FLUSHPHASE == gcphase) {
3023 #ifdef RAWPATH // TODO GC_DEBUG
3024 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3025 udn_tile_coord_y());
3028 // send the num of obj/liveobj/forwardobj to the startupcore
3029 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3030 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3031 gc_num_liveobj, gc_num_forwardobj, false);
3034 #endif // GC_PROFLIE
3036 #ifdef RAWPATH // TODO GC_DEBUG
3037 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3038 udn_tile_coord_y());
3041 #ifdef GC_CACHE_ADAPT
3043 if(PREFINISHPHASE == gcphase) {
3047 #ifdef RAWPATH // TODO GC_DEBUG
3048 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3049 udn_tile_coord_y());
3051 // cache adapt phase
3052 cacheAdapt_mutator();
3053 cacheAdapt_gc(false);
3054 //send init finish msg to core coordinator
3055 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3056 #ifdef RAWPATH // TODO GC_DEBUG
3057 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3058 udn_tile_coord_y());
3060 #ifdef GC_CACHE_SAMPLING
3061 // reset the sampling arrays
3062 bamboo_dtlb_sampling_reset();
3063 #endif // GC_CACHE_SAMPLING
3064 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3065 // zero out the gccachesamplingtbl
3066 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3067 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3068 size_cachesamplingtbl_local_r);
3070 #endif // GC_CACHE_ADAPT
3072 // invalidate all shared mem pointers
3073 bamboo_cur_msp = NULL;
3074 bamboo_smem_size = 0;
3075 bamboo_smem_zero_top = NULL;
3078 if(FINISHPHASE == gcphase) {
3084 gcprocessing = false;
3085 #ifdef RAWPATH // TODO GC_DEBUG
3086 printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3088 } // void gc_collect(struct garbagelist * stackptr)
3090 inline void gc_nocollect(struct garbagelist * stackptr) {
3091 // inform the master that this core is at a gc safe point and is ready to
3093 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3094 self_numreceiveobjs, false);
3097 if(INITPHASE == gcphase) {
3101 #ifdef RAWPATH // TODO GC_DEBUG
3102 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3105 #ifdef GC_CACHE_ADAPT
3106 // prepare for cache adaption:
3107 cacheAdapt_gc(true);
3108 #endif // GC_CACHE_ADAPT
3109 //send init finish msg to core coordinator
3110 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3113 if(MARKPHASE == gcphase) {
3117 #ifdef RAWPATH // TODO GC_DEBUG
3118 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3119 udn_tile_coord_y());
3121 mark(true, stackptr);
3122 #ifdef RAWPATH // TODO GC_DEBUG
3123 printf("(%x,%x) Finish mark phase, wait for flush\n",
3124 udn_tile_coord_x(), udn_tile_coord_y());
3127 // non-gc core collector routine
3129 if(FLUSHPHASE == gcphase) {
3133 #ifdef RAWPATH // TODO GC_DEBUG
3134 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3135 udn_tile_coord_y());
3138 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3139 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3140 gc_num_liveobj, gc_num_forwardobj, false);
3143 #endif // GC_PROFLIE
3145 #ifdef RAWPATH // TODO GC_DEBUG
3146 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3147 udn_tile_coord_y());
3150 #ifdef GC_CACHE_ADAPT
3152 if(PREFINISHPHASE == gcphase) {
3156 #ifdef RAWPATH // TODO GC_DEBUG
3157 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3158 udn_tile_coord_y());
3160 // cache adapt phase
3161 cacheAdapt_mutator();
3162 cacheAdapt_gc(false);
3163 //send init finish msg to core coordinator
3164 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3165 #ifdef RAWPATH // TODO GC_DEBUG
3166 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3167 udn_tile_coord_y());
3169 #ifdef GC_CACHE_SAMPLING
3170 // reset the sampling arrays
3171 bamboo_dtlb_sampling_reset();
3172 #endif // GC_CACHE_SAMPLING
3173 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3174 // zero out the gccachesamplingtbl
3175 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3176 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3177 size_cachesamplingtbl_local_r);
3179 #endif // GC_CACHE_ADAPT
3181 // invalidate all shared mem pointers
3182 bamboo_cur_msp = NULL;
3183 bamboo_smem_size = 0;
3184 bamboo_smem_zero_top = NULL;
3187 if(FINISHPHASE == gcphase) {
3192 gcprocessing = false;
3193 #ifdef RAWPATH // TODO GC_DEBUG
3194 printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3196 } // void gc_collect(struct garbagelist * stackptr)
3198 inline void gc_master(struct garbagelist * stackptr) {
3200 gcphase = INITPHASE;
3202 waitconfirm = false;
3206 // Note: all cores need to init gc including non-gc cores
3207 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3208 // send GC init messages to all cores
3209 send_msg_1(i, GCSTARTINIT, false);
3211 bool isfirst = true;
3212 bool allStall = false;
3214 #ifdef GC_CACHE_ADAPT
3215 // prepare for cache adaption:
3216 cacheAdapt_gc(true);
3217 #endif // GC_CACHE_ADAPT
3219 #ifdef RAWPATH // TODO GC_DEBUG
3220 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3221 udn_tile_coord_y());
3224 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3226 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3227 if(gc_checkAllCoreStatus_I()) {
3228 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3231 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3236 #ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
3237 gc_output_cache_sampling();
3238 #endif // GC_CACHE_ADAPT
3239 #ifdef RAWPATH // TODO GC_DEBUG
3240 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3241 udn_tile_coord_y());
3243 // all cores have finished compacting
3244 // restore the gcstatus of all cores
3245 // Note: all cores have to do mark including non-gc cores
3246 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3247 for(i = 1; i < NUMCORESACTIVE; ++i) {
3248 gccorestatus[i] = 1;
3249 // send GC start messages to all cores
3250 send_msg_1(i, GCSTART, false);
3253 gcphase = MARKPHASE;
3255 while(MARKPHASE == gcphase) {
3256 mark(isfirst, stackptr);
3263 } // while(MARKPHASE == gcphase)
3264 // send msgs to all cores requiring large objs info
3265 // Note: only need to ask gc cores, non-gc cores do not host any objs
3266 numconfirm = NUMCORES4GC - 1;
3267 for(i = 1; i < NUMCORES4GC; ++i) {
3268 send_msg_1(i, GCLOBJREQUEST, false);
3270 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3275 } // wait for responses
3276 // check the heaptop
3277 if(gcheaptop < gcmarkedptrbound) {
3278 gcheaptop = gcmarkedptrbound;
3283 #ifdef RAWPATH // TODO GC_DEBUG
3284 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3285 udn_tile_coord_y());
3287 // cache all large objs
3289 // no enough space to cache large objs
3290 BAMBOO_EXIT(0xb006);
3292 // predict number of blocks to fill for each core
3294 int numpbc = loadbalance(&tmpheaptop);
3296 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3297 #ifdef RAWPATH // TODO GC_DEBUG
3298 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3299 udn_tile_coord_y());
3301 //int tmptopptr = 0;
3302 //BASEPTR(gctopcore, 0, &tmptopptr);
3304 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3305 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3306 GC_BAMBOO_DEBUGPRINT(0xabab);
3307 GC_BAMBOO_DEBUGPRINT_REG(tmptopptr);
3308 for(i = 0; i < NUMCORES4GC; ++i) {
3310 BASEPTR(i, numpbc, &tmpcoreptr);
3311 //send start compact messages to all cores
3312 //TODO bug here, do not know if the direction is positive or negtive?
3313 if (tmpcoreptr < tmpheaptop) {
3314 gcstopblock[i] = numpbc + 1;
3315 if(i != STARTUPCORE) {
3316 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3318 gcblock2fill = numpbc+1;
3319 } // if(i != STARTUPCORE)
3321 gcstopblock[i] = numpbc;
3322 if(i != STARTUPCORE) {
3323 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3325 gcblock2fill = numpbc;
3326 } // if(i != STARTUPCORE)
3328 GC_BAMBOO_DEBUGPRINT(0xf000+i);
3329 GC_BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3330 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3331 // init some data strutures for compact phase
3333 gcfilledblocks[i] = 0;
3334 gcrequiredmems[i] = 0;
3344 bool finalcompact = false;
3345 // initialize pointers for comapcting
3346 struct moveHelper * orig =
3347 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3348 struct moveHelper * to =
3349 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3350 initOrig_Dst(orig, to);
3351 int filledblocks = 0;
3352 INTPTR heaptopptr = 0;
3353 bool finishcompact = false;
3354 bool iscontinue = true;
3355 bool localcompact = true;
3356 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3357 if((!finishcompact) && iscontinue) {
3358 GC_BAMBOO_DEBUGPRINT(0xe001);
3359 GC_BAMBOO_DEBUGPRINT_REG(numpbc);
3360 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3361 finishcompact = compacthelper(orig, to, &filledblocks,
3362 &heaptopptr, &localcompact);
3363 GC_BAMBOO_DEBUGPRINT(0xe002);
3364 GC_BAMBOO_DEBUGPRINT_REG(finishcompact);
3365 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3366 GC_BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3367 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3368 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3371 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3372 if(gc_checkCoreStatus_I()) {
3373 // all cores have finished compacting
3374 // restore the gcstatus of all cores
3375 for(i = 0; i < NUMCORES4GC; ++i) {
3376 gccorestatus[i] = 1;
3378 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3381 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3382 // check if there are spare mem for pending move requires
3383 if(COMPACTPHASE == gcphase) {
3384 GC_BAMBOO_DEBUGPRINT(0xe003);
3385 resolvePendingMoveRequest();
3386 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3388 GC_BAMBOO_DEBUGPRINT(0xe004);
3391 } // if(gc_checkCoreStatus_I()) else ...
3394 GC_BAMBOO_DEBUGPRINT(0xe005);
3395 GC_BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3396 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3397 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3398 to->ptr = gcmovestartaddr;
3399 to->numblocks = gcblock2fill - 1;
3400 to->bound = (to->numblocks==0) ?
3401 BAMBOO_SMEM_SIZE_L :
3402 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3403 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3404 to->offset = to->ptr - to->base;
3405 to->top = (to->numblocks==0) ?
3406 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3408 to->offset = BAMBOO_CACHE_LINE_SIZE;
3409 to->ptr += to->offset; // for header
3410 to->top += to->offset;
3411 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3412 localcompact = true;
3414 localcompact = false;
3418 } else if(!finishcompact) {
3422 } // while(COMPACTPHASE == gcphase)
3426 #ifdef RAWPATH // TODO GC_DEBUG
3427 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3428 udn_tile_coord_y());
3432 #ifdef RAWPATH // TODO GC_DEBUG
3433 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3434 udn_tile_coord_y());
3441 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3442 // Note: all cores should flush their runtime data including non-gc
3444 for(i = 1; i < NUMCORES4GC; ++i) {
3445 // send start flush messages to all cores
3446 gccorestatus[i] = 1;
3447 send_msg_1(i, GCSTARTMAPINFO, false);
3452 #ifdef RAWPATH // TODO GC_DEBUG
3453 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3454 udn_tile_coord_y());
3458 #ifdef RAWPATH // TODO GC_DEBUG
3459 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3460 udn_tile_coord_y());
3462 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3463 while(MAPPHASE == gcphase) {
3464 // check the status of all cores
3465 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3466 if(gc_checkCoreStatus_I()) {
3467 // all cores have finished sending mapping info
3468 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3471 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3472 } // while(MAPPHASE == gcphase)
3474 gcphase = FLUSHPHASE;
3475 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3476 // Note: all cores should flush their runtime data including non-gc
3478 for(i = 1; i < NUMCORESACTIVE; ++i) {
3479 // send start flush messages to all cores
3480 gccorestatus[i] = 1;
3481 send_msg_1(i, GCSTARTFLUSH, false);
3486 #ifdef RAWPATH // TODO GC_DEBUG
3487 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3488 udn_tile_coord_y());
3493 #ifdef GC_CACHE_ADAPT
3494 // now the master core need to decide the new cache strategy
3495 cacheAdapt_master();
3496 #endif // GC_CACHE_ADAPT
3498 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3499 while(FLUSHPHASE == gcphase) {
3500 // check the status of all cores
3501 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3502 if(gc_checkAllCoreStatus_I()) {
3503 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3506 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3507 } // while(FLUSHPHASE == gcphase)
3508 #ifdef RAWPATH // TODO GC_DEBUG
3509 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3510 udn_tile_coord_y());
3513 #ifdef GC_CACHE_ADAPT
3517 gcphase = PREFINISHPHASE;
3518 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3519 // Note: all cores should flush their runtime data including non-gc
3521 for(i = 1; i < NUMCORESACTIVE; ++i) {
3522 // send start flush messages to all cores
3523 gccorestatus[i] = 1;
3524 send_msg_1(i, GCSTARTPREF, false);
3526 #ifdef RAWPATH // TODO GC_DEBUG
3527 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3528 udn_tile_coord_y());
3530 // cache adapt phase
3531 cacheAdapt_mutator();
3532 #ifdef GC_CACHE_ADAPT_OUTPUT
3533 bamboo_output_cache_policy();
3535 cacheAdapt_gc(false);
3537 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3538 while(PREFINISHPHASE == gcphase) {
3539 // check the status of all cores
3540 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3541 if(gc_checkAllCoreStatus_I()) {
3542 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3545 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3546 } // while(PREFINISHPHASE == gcphase)
3548 #ifdef GC_CACHE_SAMPLING
3549 // reset the sampling arrays
3550 bamboo_dtlb_sampling_reset();
3551 #endif // GC_CACHE_SAMPLING
3552 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3553 // zero out the gccachesamplingtbl
3554 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3555 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3556 size_cachesamplingtbl_local_r);
3557 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
3559 #endif // GC_CACHE_ADAPT
3561 gcphase = FINISHPHASE;
3563 // invalidate all shared mem pointers
3564 // put it here as it takes time to inform all the other cores to
3565 // finish gc and it might cause problem when some core resumes
3566 // mutator earlier than the other cores
3567 bamboo_cur_msp = NULL;
3568 bamboo_smem_size = 0;
3569 bamboo_smem_zero_top = NULL;
3574 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3575 for(i = 1; i < NUMCORESACTIVE; ++i) {
3576 // send gc finish messages to all cores
3577 send_msg_1(i, GCFINISH, false);
3578 gccorestatus[i] = 1;
3582 gcprocessing = false;
3583 #ifdef RAWPATH // TODO GC_DEBUG
3584 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
3585 udn_tile_coord_y());
3587 } // void gc_master(struct garbagelist * stackptr)
3589 inline bool gc(struct garbagelist * stackptr) {
3592 gcprocessing = false;
3596 #ifdef GC_CACHE_ADAPT
3597 #ifdef GC_CACHE_SAMPLING
3598 // disable the timer interrupt
3599 bamboo_mask_timer_intr();
3602 // core coordinator routine
3603 if(0 == BAMBOO_NUM_OF_CORE) {
3605 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
3606 udn_tile_coord_y());
3608 bool isallstall = true;
3609 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3610 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3612 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
3613 if(gccorestatus[ti] != 0) {
3619 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3620 // some of the cores are still executing the mutator and did not reach
3621 // some gc safe point, therefore it is not ready to do gc
3622 // in case that there are some pregc information msg lost, send a confirm
3623 // msg to the 'busy' core
3624 send_msg_1(ti, GCSTARTPRE, false);
3632 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
3633 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
3635 GC_BAMBOO_DEBUGPRINT(0xec04);
3636 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3637 sumsendobj += gcnumsendobjs[0][i];
3638 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
3639 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3640 GC_BAMBOO_DEBUGPRINT(0xec05);
3641 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3642 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3643 sumsendobj -= gcnumreceiveobjs[0][i];
3644 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
3645 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3646 GC_BAMBOO_DEBUGPRINT(0xec06);
3647 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3648 if(0 != sumsendobj) {
3649 // there were still some msgs on the fly, wait until there
3650 // are some update pregc information coming and check it again
3652 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3660 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3663 #ifdef RAWPATH // TODO GC_DEBUG
3664 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3666 // Zero out the remaining bamboo_cur_msp
3667 // Only zero out the first 4 bytes of the remaining memory
3668 // Move the operation here because for the GC_CACHE_ADAPT version,
3669 // we need to make sure during the gcinit phase the shared heap is not
3670 // touched. Otherwise, there would be problem when adapt the cache
3672 if((bamboo_cur_msp != 0)
3673 && (bamboo_smem_zero_top == bamboo_cur_msp)
3674 && (bamboo_smem_size > 0)) {
3675 *((int *)bamboo_cur_msp) = 0;
3677 #ifdef GC_FLUSH_DTLB
3678 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3679 BAMBOO_CLEAN_DTLB();
3680 gc_num_flush_dtlb++;
3683 #ifdef GC_CACHE_ADAPT
3684 #ifdef GC_CACHE_SAMPLING
3685 // get the sampling data
3686 bamboo_output_dtlb_sampling();
3687 #endif // GC_CACHE_SAMPLING
3688 #endif // GC_CACHE_ADAPT
3689 gcprocessing = true;
3690 gc_master(stackptr);
3691 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
3692 // Zero out the remaining bamboo_cur_msp
3693 // Only zero out the first 4 bytes of the remaining memory
3694 // Move the operation here because for the GC_CACHE_ADAPT version,
3695 // we need to make sure during the gcinit phase the shared heap is not
3696 // touched. Otherwise, there would be problem when adapt the cache
3698 if((bamboo_cur_msp != 0)
3699 && (bamboo_smem_zero_top == bamboo_cur_msp)
3700 && (bamboo_smem_size > 0)) {
3701 *((int *)bamboo_cur_msp) = 0;
3703 #ifdef GC_FLUSH_DTLB
3704 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3705 BAMBOO_CLEAN_DTLB();
3706 gc_num_flush_dtlb++;
3709 #ifdef GC_CACHE_ADAPT
3710 #ifdef GC_CACHE_SAMPLING
3711 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3712 // get the sampling data
3713 bamboo_output_dtlb_sampling();
3715 #endif // GC_CACHE_SAMPLING
3716 #endif // GC_CACHE_ADAPT
3717 gcprocessing = true;
3718 gc_collect(stackptr);
3720 // Zero out the remaining bamboo_cur_msp
3721 // Only zero out the first 4 bytes of the remaining memory
3722 // Move the operation here because for the GC_CACHE_ADAPT version,
3723 // we need to make sure during the gcinit phase the shared heap is not
3724 // touched. Otherwise, there would be problem when adapt the cache
3726 if((bamboo_cur_msp != 0)
3727 && (bamboo_smem_zero_top == bamboo_cur_msp)
3728 && (bamboo_smem_size > 0)) {
3729 *((int *)bamboo_cur_msp) = 0;
3731 #ifdef GC_FLUSH_DTLB
3732 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3733 BAMBOO_CLEAN_DTLB();
3734 gc_num_flush_dtlb++;
3737 #ifdef GC_CACHE_ADAPT
3738 #ifdef GC_CACHE_SAMPLING
3739 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3740 // get the sampling data
3741 bamboo_output_dtlb_sampling();
3743 #endif // GC_CACHE_SAMPLING
3744 #endif // GC_CACHE_ADAPT
3745 // not a gc core, should wait for gcfinish msg
3746 gcprocessing = true;
3747 gc_nocollect(stackptr);
3749 #ifdef GC_CACHE_ADAPT
3750 #ifdef GC_CACHE_SAMPLING
3751 // enable the timer interrupt
3752 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
3753 bamboo_unmask_timer_intr();
3754 #endif // GC_CACHE_SAMPLING
3755 #endif // GC_CACHE_ADAPT
3758 } // void gc(struct garbagelist * stackptr)
3761 inline void gc_profileStart(void) {
3762 if(!gc_infoOverflow) {
3763 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
3764 gc_infoArray[gc_infoIndex] = gcInfo;
3766 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
3770 inline void gc_profileItem(void) {
3771 if(!gc_infoOverflow) {
3772 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3773 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3777 inline void gc_profileEnd(void) {
3778 if(!gc_infoOverflow) {
3779 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3780 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3781 gcInfo->time[gcInfo->index++] = gc_num_livespace;
3782 gcInfo->time[gcInfo->index++] = gc_num_freespace;
3783 gcInfo->time[gcInfo->index++] = gc_num_lobj;
3784 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
3785 gcInfo->time[gcInfo->index++] = gc_num_obj;
3786 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
3787 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
3789 if(gc_infoIndex == GCINFOLENGTH) {
3790 gc_infoOverflow = true;
3791 //taskInfoIndex = 0;
3796 // output the profiling data
3797 void gc_outputProfileData() {
3800 unsigned long long totalgc = 0;
3802 #ifndef BAMBOO_MEMPROF
3803 BAMBOO_PRINT(0xdddd);
3805 // output task related info
3806 for(i= 0; i < gc_infoIndex; i++) {
3807 GCInfo * gcInfo = gc_infoArray[i];
3808 #ifdef BAMBOO_MEMPROF
3809 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
3811 unsigned long long tmp = 0;
3812 BAMBOO_PRINT(0xddda);
3813 for(j = 0; j < gcInfo->index - 7; j++) {
3814 BAMBOO_PRINT(gcInfo->time[j]);
3815 BAMBOO_PRINT(gcInfo->time[j]-tmp);
3816 BAMBOO_PRINT(0xdddb);
3817 tmp = gcInfo->time[j];
3819 tmp = (tmp-gcInfo->time[0]);
3820 BAMBOO_PRINT_REG(tmp);
3821 BAMBOO_PRINT(0xdddc);
3822 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
3823 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
3824 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
3825 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
3826 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
3827 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
3828 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
3829 BAMBOO_PRINT(0xddde);
3833 #ifndef BAMBOO_MEMPROF
3834 BAMBOO_PRINT(0xdddf);
3836 BAMBOO_PRINT_REG(totalgc);
3838 if(gc_infoOverflow) {
3839 BAMBOO_PRINT(0xefee);
3842 #ifndef BAMBOO_MEMPROF
3843 BAMBOO_PRINT(0xeeee);
3846 #endif // #ifdef GC_PROFILE