3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 int lengths[NUMLOBJPTRS];
45 int hosts[NUMLOBJPTRS];
46 struct lobjpointerblock *next;
47 struct lobjpointerblock *prev;
50 struct lobjpointerblock *gclobjhead=NULL;
51 int gclobjheadindex=0;
52 struct lobjpointerblock *gclobjtail=NULL;
53 int gclobjtailindex=0;
54 struct lobjpointerblock *gclobjtail2=NULL;
55 int gclobjtailindex2=0;
56 struct lobjpointerblock *gclobjspare=NULL;
59 typedef struct gc_cache_revise_info {
60 int orig_page_start_va;
66 unsigned int revised_sampling[NUMCORESACTIVE];
67 } gc_cache_revise_info_t;
68 gc_cache_revise_info_t gc_cache_revise_infomation;
69 #endif// GC_CACHE_ADAPT
72 // dump whole mem in blocks
73 inline void dumpSMem() {
81 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
83 // reserved blocks for sblocktbl
84 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
86 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
87 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
88 udn_tile_coord_x(), udn_tile_coord_y(),
89 *((int *)(i)), *((int *)(i + 4)),
90 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
91 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
92 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
93 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
94 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
95 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
96 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
98 sblock = gcreservedsb;
99 bool advanceblock = false;
101 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
102 advanceblock = false;
103 // computing sblock # and block #, core coordinate (x,y) also
104 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
106 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
107 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
119 coren = gc_block2core[block%(NUMCORES4GC*2)];
121 // compute core coordinate
122 BAMBOO_COORDS(coren, &x, &y);
123 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
124 udn_tile_coord_x(), udn_tile_coord_y(),
125 block, sblock++, x, y,
126 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
129 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
130 udn_tile_coord_x(), udn_tile_coord_y(),
131 *((int *)(i)), *((int *)(i + 4)),
132 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
133 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
134 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
135 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
136 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
137 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
138 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
140 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
144 // should be invoked with interruption closed
145 inline void gc_enqueue_I(void *ptr) {
146 GC_BAMBOO_DEBUGPRINT(0xe601);
147 GC_BAMBOO_DEBUGPRINT_REG(ptr);
148 if (gcheadindex==NUMPTRS) {
149 struct pointerblock * tmp;
154 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
155 } // if (gcspare!=NULL)
159 } // if (gcheadindex==NUMPTRS)
160 gchead->ptrs[gcheadindex++]=ptr;
161 GC_BAMBOO_DEBUGPRINT(0xe602);
162 } // void gc_enqueue_I(void *ptr)
164 // dequeue and destroy the queue
165 inline void * gc_dequeue_I() {
166 if (gctailindex==NUMPTRS) {
167 struct pointerblock *tmp=gctail;
174 } // if (gcspare!=NULL)
175 } // if (gctailindex==NUMPTRS)
176 return gctail->ptrs[gctailindex++];
177 } // void * gc_dequeue()
179 // dequeue and do not destroy the queue
180 inline void * gc_dequeue2_I() {
181 if (gctailindex2==NUMPTRS) {
182 struct pointerblock *tmp=gctail2;
183 gctail2=gctail2->next;
185 } // if (gctailindex2==NUMPTRS)
186 return gctail2->ptrs[gctailindex2++];
187 } // void * gc_dequeue2()
189 inline int gc_moreItems_I() {
190 if ((gchead==gctail)&&(gctailindex==gcheadindex))
193 } // int gc_moreItems()
195 inline int gc_moreItems2_I() {
196 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
199 } // int gc_moreItems2()
201 // should be invoked with interruption closed
202 // enqueue a large obj: start addr & length
203 inline void gc_lobjenqueue_I(void *ptr,
206 GC_BAMBOO_DEBUGPRINT(0xe901);
207 if (gclobjheadindex==NUMLOBJPTRS) {
208 struct lobjpointerblock * tmp;
209 if (gclobjspare!=NULL) {
213 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
214 } // if (gclobjspare!=NULL)
215 gclobjhead->next=tmp;
216 tmp->prev = gclobjhead;
219 } // if (gclobjheadindex==NUMLOBJPTRS)
220 gclobjhead->lobjs[gclobjheadindex]=ptr;
221 gclobjhead->lengths[gclobjheadindex]=length;
222 gclobjhead->hosts[gclobjheadindex++]=host;
223 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
224 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
225 GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
226 } // void gc_lobjenqueue_I(void *ptr...)
228 // dequeue and destroy the queue
229 inline void * gc_lobjdequeue_I(int * length,
231 if (gclobjtailindex==NUMLOBJPTRS) {
232 struct lobjpointerblock *tmp=gclobjtail;
233 gclobjtail=gclobjtail->next;
235 gclobjtail->prev = NULL;
236 if (gclobjspare!=NULL) {
242 } // if (gclobjspare!=NULL)
243 } // if (gclobjtailindex==NUMLOBJPTRS)
245 *length = gclobjtail->lengths[gclobjtailindex];
248 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
250 return gclobjtail->lobjs[gclobjtailindex++];
251 } // void * gc_lobjdequeue()
253 inline int gc_lobjmoreItems_I() {
254 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
257 } // int gc_lobjmoreItems()
259 // dequeue and don't destroy the queue
260 inline void gc_lobjdequeue2_I() {
261 if (gclobjtailindex2==NUMLOBJPTRS) {
262 gclobjtail2=gclobjtail2->next;
266 } // if (gclobjtailindex2==NUMLOBJPTRS)
267 } // void * gc_lobjdequeue2()
269 inline int gc_lobjmoreItems2_I() {
270 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
273 } // int gc_lobjmoreItems2()
275 // 'reversly' dequeue and don't destroy the queue
276 inline void gc_lobjdequeue3_I() {
277 if (gclobjtailindex2==0) {
278 gclobjtail2=gclobjtail2->prev;
279 gclobjtailindex2=NUMLOBJPTRS-1;
282 } // if (gclobjtailindex2==NUMLOBJPTRS)
283 } // void * gc_lobjdequeue3()
285 inline int gc_lobjmoreItems3_I() {
286 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
289 } // int gc_lobjmoreItems3()
291 inline void gc_lobjqueueinit4_I() {
292 gclobjtail2 = gclobjtail;
293 gclobjtailindex2 = gclobjtailindex;
294 } // void gc_lobjqueueinit2()
296 inline void * gc_lobjdequeue4_I(int * length,
298 if (gclobjtailindex2==NUMLOBJPTRS) {
299 gclobjtail2=gclobjtail2->next;
301 } // if (gclobjtailindex==NUMLOBJPTRS)
303 *length = gclobjtail2->lengths[gclobjtailindex2];
306 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
308 return gclobjtail2->lobjs[gclobjtailindex2++];
309 } // void * gc_lobjdequeue()
311 inline int gc_lobjmoreItems4_I() {
312 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
315 } // int gc_lobjmoreItems(
317 INTPTR gccurr_heapbound = 0;
319 inline void gettype_size(void * ptr,
322 int type = ((int *)ptr)[0];
324 if(type < NUMCLASSES) {
326 size = classsize[type];
329 struct ArrayObject *ao=(struct ArrayObject *)ptr;
330 int elementsize=classsize[type];
331 int length=ao->___length___;
332 size=sizeof(struct ArrayObject)+length*elementsize;
333 } // if(type < NUMCLASSES)
338 inline bool isLarge(void * ptr,
341 GC_BAMBOO_DEBUGPRINT(0xe701);
342 GC_BAMBOO_DEBUGPRINT_REG(ptr);
343 // check if a pointer is referring to a large object
344 gettype_size(ptr, ttype, tsize);
345 GC_BAMBOO_DEBUGPRINT(*tsize);
346 int bound = (BAMBOO_SMEM_SIZE);
347 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
348 bound = (BAMBOO_SMEM_SIZE_L);
350 if((((int)ptr-gcbaseva)%(bound))==0) {
351 // ptr is a start of a block
352 GC_BAMBOO_DEBUGPRINT(0xe702);
353 GC_BAMBOO_DEBUGPRINT(1);
356 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
357 // it acrosses the boundary of current block
358 GC_BAMBOO_DEBUGPRINT(0xe703);
359 GC_BAMBOO_DEBUGPRINT(1);
362 GC_BAMBOO_DEBUGPRINT(0);
364 } // bool isLarge(void * ptr, int * ttype, int * tsize)
366 inline int hostcore(void * ptr) {
367 // check the host core of ptr
369 RESIDECORE(ptr, &host);
370 GC_BAMBOO_DEBUGPRINT(0xedd0);
371 GC_BAMBOO_DEBUGPRINT_REG(ptr);
372 GC_BAMBOO_DEBUGPRINT_REG(host);
374 } // int hostcore(void * ptr)
376 inline void cpu2coords(int coren,
379 *x = bamboo_cpu2coords[2*coren];
380 *y = bamboo_cpu2coords[2*coren+1];
381 } // void cpu2coords(...)
383 inline bool isLocal(void * ptr) {
384 // check if a pointer is in shared heap on this core
385 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
386 } // bool isLocal(void * ptr)
388 inline bool gc_checkCoreStatus_I() {
389 bool allStall = true;
390 for(int i = 0; i < NUMCORES4GC; ++i) {
391 if(gccorestatus[i] != 0) {
394 } // if(gccorestatus[i] != 0)
395 } // for(i = 0; i < NUMCORES4GC; ++i)
399 inline bool gc_checkAllCoreStatus_I() {
400 bool allStall = true;
401 for(int i = 0; i < NUMCORESACTIVE; ++i) {
402 if(gccorestatus[i] != 0) {
405 } // if(gccorestatus[i] != 0)
406 } // for(i = 0; i < NUMCORESACTIVE; ++i)
410 inline void checkMarkStatue() {
411 GC_BAMBOO_DEBUGPRINT(0xee01);
414 (waitconfirm && (numconfirm == 0))) {
415 GC_BAMBOO_DEBUGPRINT(0xee02);
419 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
422 entry_index = gcnumsrobjs_index;
424 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
425 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
426 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
427 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
428 // check the status of all cores
429 bool allStall = gc_checkAllCoreStatus_I();
430 GC_BAMBOO_DEBUGPRINT(0xee03);
432 GC_BAMBOO_DEBUGPRINT(0xee04);
435 GC_BAMBOO_DEBUGPRINT(0xee05);
436 // the first time found all cores stall
437 // send out status confirm msg to all other cores
438 // reset the corestatus array too
439 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
441 numconfirm = NUMCORESACTIVE - 1;
442 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
443 for(i = 1; i < NUMCORESACTIVE; ++i) {
445 // send mark phase finish confirm request msg to core i
446 send_msg_1(i, GCMARKCONFIRM, false);
447 } // for(i = 1; i < NUMCORESACTIVE; ++i)
450 // check if the sum of send objs and receive obj are the same
451 // yes->check if the info is the latest; no->go on executing
453 for(i = 0; i < NUMCORESACTIVE; ++i) {
454 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
455 } // for(i = 0; i < NUMCORESACTIVE; ++i)
456 GC_BAMBOO_DEBUGPRINT(0xee06);
457 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
458 for(i = 0; i < NUMCORESACTIVE; ++i) {
459 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
460 } // for(i = 0; i < NUMCORESACTIVE; ++i)
461 GC_BAMBOO_DEBUGPRINT(0xee07);
462 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
463 if(0 == sumsendobj) {
464 // Check if there are changes of the numsendobjs or numreceiveobjs on
466 bool ischanged = false;
467 for(i = 0; i < NUMCORESACTIVE; ++i) {
468 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
469 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
473 } // for(i = 0; i < NUMCORESACTIVE; ++i)
474 GC_BAMBOO_DEBUGPRINT(0xee08);
475 GC_BAMBOO_DEBUGPRINT_REG(ischanged);
477 GC_BAMBOO_DEBUGPRINT(0xee09);
478 // all the core status info are the latest
480 gcphase = COMPACTPHASE;
481 // restore the gcstatus for all cores
482 for(i = 0; i < NUMCORESACTIVE; ++i) {
484 } // for(i = 0; i < NUMCORESACTIVE; ++i)
487 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
490 // There were changes between phase 1 and phase 2, can not decide
491 // whether the mark phase has been finished
493 // As it fails in phase 2, flip the entries
494 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
495 } // if(0 == sumsendobj) else ...
496 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
497 } // if(!gcwaitconfirm) else()
499 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
501 } // if((!waitconfirm)...
502 GC_BAMBOO_DEBUGPRINT(0xee0a);
503 } // void checkMarkStatue()
505 inline void initGC() {
507 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
508 for(i = 0; i < NUMCORES4GC; ++i) {
510 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
511 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
513 gcrequiredmems[i] = 0;
514 gcfilledblocks[i] = 0;
516 } // for(i = 0; i < NUMCORES4GC; ++i)
517 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
519 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
520 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
525 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
526 gcself_numsendobjs = 0;
527 gcself_numreceiveobjs = 0;
528 gcmarkedptrbound = 0;
541 gcheadindex=gctailindex=gctailindex2 = 0;
542 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
544 gctailindex = gctailindex2 = gcheadindex;
545 gctail = gctail2 = gchead;
548 // initialize the large obj queues
549 if (gclobjhead==NULL) {
552 gclobjtailindex2 = 0;
553 gclobjhead=gclobjtail=gclobjtail2=
554 RUNMALLOC(sizeof(struct lobjpointerblock));
556 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
557 gclobjtail = gclobjtail2 = gclobjhead;
559 gclobjhead->next = gclobjhead->prev = NULL;
561 #ifdef LOCALHASHTBL_TEST
562 freeRuntimeHash(gcpointertbl);
563 gcpointertbl = allocateRuntimeHash(20);
565 mgchashreset(gcpointertbl);
568 freeMGCHash(gcforwardobjtbl);
569 gcforwardobjtbl = allocateMGCHash(20, 3);
571 // initialize the mapping info related structures
572 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
573 // Never free the shared hash table, just reset it
574 mgcsharedhashReset(gcsharedptbl);
577 gc_num_livespace = 0;
578 gc_num_freespace = 0;
580 gc_num_lobjspace = 0;
582 gc_num_forwardobj = 0;
583 gc_num_profiles = NUMCORESACTIVE - 1;
587 // compute load balance for all cores
588 inline int loadbalance(int * heaptop) {
589 // compute load balance
592 // get the total loads
593 int tloads = gcloads[STARTUPCORE];
594 for(i = 1; i < NUMCORES4GC; i++) {
595 tloads += gcloads[i];
597 *heaptop = gcbaseva + tloads;
599 GC_BAMBOO_DEBUGPRINT(0xdddd);
600 GC_BAMBOO_DEBUGPRINT_REG(tloads);
601 GC_BAMBOO_DEBUGPRINT_REG(*heaptop);
603 BLOCKINDEX(*heaptop, &b);
604 int numbpc = b / NUMCORES4GC; // num of blocks per core
605 GC_BAMBOO_DEBUGPRINT_REG(b);
606 GC_BAMBOO_DEBUGPRINT_REG(numbpc);
608 RESIDECORE(heaptop, &gctopcore);
609 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
611 } // void loadbalance(int * heaptop)
613 inline bool cacheLObjs() {
614 // check the total mem size need for large objs
615 unsigned long long sumsize = 0;
617 GC_BAMBOO_DEBUGPRINT(0xe801);
618 gclobjtail2 = gclobjtail;
619 gclobjtailindex2 = gclobjtailindex;
623 // compute total mem size required and sort the lobjs in ascending order
624 // TODO USE QUICK SORT INSTEAD?
625 while(gc_lobjmoreItems2_I()) {
627 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
628 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
629 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
634 GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
635 GC_BAMBOO_DEBUGPRINT_REG(tmp_len);
636 GC_BAMBOO_DEBUGPRINT_REG(sumsize);
637 int i = gclobjtailindex2-1;
638 struct lobjpointerblock * tmp_block = gclobjtail2;
639 // find the place to insert
642 if(tmp_block->prev == NULL) {
645 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
646 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
647 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
648 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
649 tmp_block = tmp_block->prev;
653 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
655 if(tmp_block->lobjs[i-1] > tmp_lobj) {
656 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
657 tmp_block->lengths[i] = tmp_block->lengths[i-1];
658 tmp_block->hosts[i] = tmp_block->hosts[i-1];
662 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
663 } // if(i ==0 ) else {}
666 if(i != gclobjtailindex2 - 1) {
667 tmp_block->lobjs[i] = tmp_lobj;
668 tmp_block->lengths[i] = tmp_len;
669 tmp_block->hosts[i] = tmp_host;
671 } // while(gc_lobjmoreItems2())
674 gc_num_lobjspace = sumsize;
676 // check if there are enough space to cache these large objs
677 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
678 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
679 // do not have enough room to cache large objs
680 GC_BAMBOO_DEBUGPRINT(0xe802);
681 GC_BAMBOO_DEBUGPRINT_REG(dst);
682 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
683 GC_BAMBOO_DEBUGPRINT_REG(sumsize);
686 GC_BAMBOO_DEBUGPRINT(0xe803);
687 GC_BAMBOO_DEBUGPRINT_REG(dst);
688 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
690 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
691 // cache the largeObjs to the top of the shared heap
692 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
693 while(gc_lobjmoreItems3_I()) {
695 size = gclobjtail2->lengths[gclobjtailindex2];
696 // set the mark field to , indicating that this obj has been moved
697 // and need to be flushed
698 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
700 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
701 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
703 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
705 GC_BAMBOO_DEBUGPRINT(0x804);
706 GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
707 GC_BAMBOO_DEBUGPRINT(dst);
708 GC_BAMBOO_DEBUGPRINT_REG(size);
709 GC_BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
710 GC_BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
713 } // void cacheLObjs()
715 // update the bmmboo_smemtbl to record current shared mem usage
716 void updateSmemTbl(int coren,
719 int bound = BAMBOO_SMEM_SIZE_L;
720 BLOCKINDEX(localtop, <opcore);
721 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
722 bound = BAMBOO_SMEM_SIZE;
724 int load = (localtop-gcbaseva)%bound;
729 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
730 if(toset < ltopcore) {
731 bamboo_smemtbl[toset]=
732 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
734 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
736 } else if(toset == ltopcore) {
737 bamboo_smemtbl[toset] = load;
739 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
751 } // void updateSmemTbl(int, int)
753 inline void moveLObjs() {
754 GC_BAMBOO_DEBUGPRINT(0xea01);
756 // update the gcmem_mixed_usedmem
757 gcmem_mixed_usedmem = 0;
759 // zero out the smemtbl
760 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
761 // find current heap top
762 // flush all gcloads to indicate the real heap top on one core
763 // previous it represents the next available ptr on a core
764 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
765 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
766 // edge of a block, check if this is exactly the heaptop
767 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
768 gcloads[0]+=(gcfilledblocks[0]>1 ?
769 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
771 updateSmemTbl(0, gcloads[0]);
772 GC_BAMBOO_DEBUGPRINT(0xea02);
773 GC_BAMBOO_DEBUGPRINT_REG(gcloads[0]);
774 GC_BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
775 for(int i = 1; i < NUMCORES4GC; i++) {
777 GC_BAMBOO_DEBUGPRINT(0xf000+i);
778 GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
779 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
780 if((gcfilledblocks[i] > 0)
781 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
782 // edge of a block, check if this is exactly the heaptop
783 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
785 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
788 updateSmemTbl(i, gcloads[i]);
789 GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
790 } // for(int i = 1; i < NUMCORES4GC; i++) {
792 // find current heap top
794 // a bug here: when using local allocation, directly move large objects
795 // to the highest free chunk might not be memory efficient
800 for(i = gcnumblock-1; i >= 0; i--) {
801 if(bamboo_smemtbl[i] > 0) {
806 tmpheaptop = gcbaseva;
808 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
809 (BAMBOO_SMEM_SIZE_L*i) :
810 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
813 // move large objs from gcheaptop to tmpheaptop
814 // write the header first
815 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
817 gcmem_mixed_usedmem += tomove;
819 GC_BAMBOO_DEBUGPRINT(0xea03);
820 GC_BAMBOO_DEBUGPRINT_REG(tomove);
821 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
822 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
823 // flush the sbstartbl
824 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
825 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
827 gcheaptop = tmpheaptop;
829 // check how many blocks it acrosses
830 int remain = tmpheaptop-gcbaseva;
831 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
832 int b = 0; // number of the block
833 BLOCKINDEX(tmpheaptop, &b);
834 // check the remaining space in this block
835 bound = (BAMBOO_SMEM_SIZE);
836 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
837 bound = (BAMBOO_SMEM_SIZE_L);
839 remain = bound - remain%bound;
841 GC_BAMBOO_DEBUGPRINT(0xea04);
846 int base = tmpheaptop;
848 remain -= BAMBOO_CACHE_LINE_SIZE;
849 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
850 gc_lobjqueueinit4_I();
851 while(gc_lobjmoreItems4_I()) {
852 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
853 ALIGNSIZE(size, &isize);
855 // this object acrosses blocks
857 // close current block, fill its header
858 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
859 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
860 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
864 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
865 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
867 remain -= BAMBOO_CACHE_LINE_SIZE;
868 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
869 BLOCKINDEX(tmpheaptop, &b);
870 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
873 // move the large obj
874 if((int)gcheaptop < (int)(tmpheaptop)+size) {
875 memmove(tmpheaptop, gcheaptop, size);
877 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
878 memcpy(tmpheaptop, gcheaptop, size);
880 // fill the remaining space with -2 padding
881 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
882 GC_BAMBOO_DEBUGPRINT(0xea05);
883 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
884 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
885 GC_BAMBOO_DEBUGPRINT_REG(size);
886 GC_BAMBOO_DEBUGPRINT_REG(isize);
887 GC_BAMBOO_DEBUGPRINT_REG(base);
889 // cache the mapping info anyway
890 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
891 #ifdef LOCALHASHTBL_TEST
892 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
894 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
896 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
897 GC_BAMBOO_DEBUGPRINT(0xcdca);
898 GC_BAMBOO_DEBUGPRINT_REG(ptr);
899 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
900 if(host != BAMBOO_NUM_OF_CORE) {
901 // send the original host core with the mapping info
902 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
903 GC_BAMBOO_DEBUGPRINT(0xcdcb);
904 GC_BAMBOO_DEBUGPRINT_REG(ptr);
905 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
906 } // if(host != BAMBOO_NUM_OF_CORE)
909 // set the gcsbstarttbl and bamboo_smemtbl
910 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
911 for(int k = 1; k < tmpsbs; k++) {
912 gcsbstarttbl[sb+k] = (INTPTR)(-1);
915 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
916 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
917 for(; b < tmpsbs; b++) {
918 bamboo_smemtbl[b] = bound;
919 if(b==NUMCORES4GC-1) {
920 bound = BAMBOO_SMEM_SIZE;
923 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
924 gcsbstarttbl[sb] = (INTPTR)(-1);
925 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
926 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
927 bamboo_smemtbl[b] = bound;
929 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
930 remain = tmpheaptop-gcbaseva;
931 bamboo_smemtbl[b] = remain%bound;
932 remain = bound - bamboo_smemtbl[b];
933 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
935 // close current block and fill the header
936 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
937 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
940 if(remain == BAMBOO_CACHE_LINE_SIZE) {
941 // fill with 0 in case
942 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
944 remain -= BAMBOO_CACHE_LINE_SIZE;
945 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
948 // move the large obj
949 if((int)gcheaptop < (int)(tmpheaptop)+size) {
950 memmove(tmpheaptop, gcheaptop, size);
952 memcpy(tmpheaptop, gcheaptop, size);
954 // fill the remaining space with -2 padding
955 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
956 GC_BAMBOO_DEBUGPRINT(0xea06);
957 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
958 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
959 GC_BAMBOO_DEBUGPRINT_REG(size);
960 GC_BAMBOO_DEBUGPRINT_REG(isize);
964 // cache the mapping info anyway
965 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
966 #ifdef LOCALHASHTBL_TEST
967 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
969 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
971 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
972 GC_BAMBOO_DEBUGPRINT(0xcdcc);
973 GC_BAMBOO_DEBUGPRINT_REG(ptr);
974 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
975 GC_BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
976 if(host != BAMBOO_NUM_OF_CORE) {
977 // send the original host core with the mapping info
978 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
979 GC_BAMBOO_DEBUGPRINT(0xcdcd);
980 GC_BAMBOO_DEBUGPRINT_REG(ptr);
981 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
982 } // if(host != BAMBOO_NUM_OF_CORE)
985 // update bamboo_smemtbl
986 bamboo_smemtbl[b] += isize;
987 } // if(remain < isize) else ...
988 } // while(gc_lobjmoreItems())
990 // close current block, fill the header
991 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
992 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
993 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
995 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
997 gcheaptop = tmpheaptop;
1001 GC_BAMBOO_DEBUGPRINT(0xea07);
1002 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1004 bamboo_free_block = 0;
1007 tbound = (bamboo_free_block<NUMCORES4GC) ?
1008 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1009 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1010 bamboo_free_block++;
1012 // the first non-full partition
1018 // check how many live space there are
1019 gc_num_livespace = 0;
1020 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1021 gc_num_livespace += bamboo_smemtbl[tmpi];
1023 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1025 GC_BAMBOO_DEBUGPRINT(0xea08);
1026 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1027 } // void moveLObjs()
1029 inline void markObj(void * objptr) {
1030 if(objptr == NULL) {
1033 if(ISSHAREDOBJ(objptr)) {
1034 int host = hostcore(objptr);
1035 if(BAMBOO_NUM_OF_CORE == host) {
1037 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1038 if(((int *)objptr)[6] == INIT) {
1039 // this is the first time that this object is discovered,
1040 // set the flag as DISCOVERED
1041 ((int *)objptr)[6] |= DISCOVERED;
1042 BAMBOO_CACHE_FLUSH_LINE(objptr);
1043 gc_enqueue_I(objptr);
1045 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1047 GC_BAMBOO_DEBUGPRINT(0xbbbb);
1048 GC_BAMBOO_DEBUGPRINT_REG(host);
1049 GC_BAMBOO_DEBUGPRINT_REG(objptr);
1050 // check if this obj has been forwarded
1051 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1052 // send a msg to host informing that objptr is active
1053 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1055 gc_num_forwardobj++;
1056 #endif // GC_PROFILE
1057 gcself_numsendobjs++;
1058 MGCHashadd(gcforwardobjtbl, (int)objptr);
1062 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1063 gc_enqueue_I(objptr);
1064 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1065 } // if(ISSHAREDOBJ(objptr))
1066 } // void markObj(void * objptr)
1068 // enqueue root objs
1069 inline void tomark(struct garbagelist * stackptr) {
1070 if(MARKPHASE != gcphase) {
1071 GC_BAMBOO_DEBUGPRINT_REG(gcphase);
1072 BAMBOO_EXIT(0xb001);
1074 gcbusystatus = true;
1078 // enqueue current stack
1079 while(stackptr!=NULL) {
1080 GC_BAMBOO_DEBUGPRINT(0xe501);
1081 GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
1082 GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
1083 GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1084 for(i=0; i<stackptr->size; i++) {
1085 if(stackptr->array[i] != NULL) {
1086 markObj(stackptr->array[i]);
1089 stackptr=stackptr->next;
1092 GC_BAMBOO_DEBUGPRINT(0xe503);
1093 // enqueue objectsets
1094 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1095 for(i=0; i<NUMCLASSES; i++) {
1096 struct parameterwrapper ** queues =
1097 objectqueues[BAMBOO_NUM_OF_CORE][i];
1098 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1099 for(j = 0; j < length; ++j) {
1100 struct parameterwrapper * parameter = queues[j];
1101 struct ObjectHash * set=parameter->objectset;
1102 struct ObjectNode * ptr=set->listhead;
1104 markObj((void *)ptr->key);
1111 // euqueue current task descriptor
1112 if(currtpd != NULL) {
1113 GC_BAMBOO_DEBUGPRINT(0xe504);
1114 for(i=0; i<currtpd->numParameters; i++) {
1115 markObj(currtpd->parameterArray[i]);
1119 GC_BAMBOO_DEBUGPRINT(0xe505);
1120 // euqueue active tasks
1121 if(activetasks != NULL) {
1122 struct genpointerlist * ptr=activetasks->list;
1124 struct taskparamdescriptor *tpd=ptr->src;
1126 for(i=0; i<tpd->numParameters; i++) {
1127 markObj(tpd->parameterArray[i]);
1133 GC_BAMBOO_DEBUGPRINT(0xe506);
1134 // enqueue cached transferred obj
1135 struct QueueItem * tmpobjptr = getHead(&objqueue);
1136 while(tmpobjptr != NULL) {
1137 struct transObjInfo * objInfo =
1138 (struct transObjInfo *)(tmpobjptr->objectptr);
1139 markObj(objInfo->objptr);
1140 tmpobjptr = getNextQueueItem(tmpobjptr);
1143 GC_BAMBOO_DEBUGPRINT(0xe507);
1144 // enqueue cached objs to be transferred
1145 struct QueueItem * item = getHead(totransobjqueue);
1146 while(item != NULL) {
1147 struct transObjInfo * totransobj =
1148 (struct transObjInfo *)(item->objectptr);
1149 markObj(totransobj->objptr);
1150 item = getNextQueueItem(item);
1151 } // while(item != NULL)
1153 GC_BAMBOO_DEBUGPRINT(0xe508);
1154 // enqueue lock related info
1155 for(i = 0; i < runtime_locklen; ++i) {
1156 markObj((void *)(runtime_locks[i].redirectlock));
1157 if(runtime_locks[i].value != NULL) {
1158 markObj((void *)(runtime_locks[i].value));
1162 GC_BAMBOO_DEBUGPRINT(0xe509);
1164 // enqueue global thread queue
1166 unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
1167 if(thread_counter > 0) {
1168 unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
1169 for(i = thread_counter; i > 0; i--) {
1170 markObj((void *)bamboo_thread_queue[4+start]);
1171 start = (start+1)&bamboo_max_thread_num_mask;
1175 GC_BAMBOO_DEBUGPRINT(0xe50a);
1177 } // void tomark(struct garbagelist * stackptr)
1179 inline void mark(bool isfirst,
1180 struct garbagelist * stackptr) {
1181 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed01);
1183 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed02);
1184 // enqueue root objs
1186 gccurr_heaptop = 0; // record the size of all active objs in this core
1187 // aligned but does not consider block boundaries
1188 gcmarkedptrbound = 0;
1190 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed03);
1192 bool checkfield = true;
1193 bool sendStall = false;
1195 while(MARKPHASE == gcphase) {
1196 if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed04);
1198 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1199 bool hasItems = gc_moreItems2_I();
1200 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1201 GC_BAMBOO_DEBUGPRINT(0xed05);
1206 gcbusystatus = true;
1208 void * ptr = gc_dequeue2_I();
1210 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1214 // check if it is a shared obj
1215 if(ISSHAREDOBJ(ptr)) {
1216 // a shared obj, check if it is a local obj on this core
1217 int host = hostcore(ptr);
1218 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1220 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1221 if(isLarge(ptr, &type, &size) && isnotmarked) {
1222 // ptr is a large object and not marked or enqueued
1223 GC_BAMBOO_DEBUGPRINT(0xecec);
1224 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1225 GC_BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1226 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1227 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1229 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1231 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1232 BAMBOO_CACHE_FLUSH_LINE(ptr);
1233 } else if(isnotmarked) {
1234 // ptr is an unmarked active object on this core
1235 ALIGNSIZE(size, &isize);
1236 gccurr_heaptop += isize;
1237 GC_BAMBOO_DEBUGPRINT(0xaaaa);
1238 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1239 GC_BAMBOO_DEBUGPRINT_REG(isize);
1240 GC_BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1242 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1243 BAMBOO_CACHE_FLUSH_LINE(ptr);
1245 if(ptr + size > gcmarkedptrbound) {
1246 gcmarkedptrbound = ptr + size;
1247 } // if(ptr + size > gcmarkedptrbound)
1249 // ptr is not an active obj or has been marked
1251 } // if(isLarge(ptr, &type, &size)) else ...
1252 } /* can never reach here
1254 // check if this obj has been forwarded
1255 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1256 // send a msg to host informing that ptr is active
1257 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1258 gcself_numsendobjs++;
1259 MGCHashadd(gcforwardobjtbl, (int)ptr);
1262 }// if(isLocal(ptr)) else ...*/
1263 } // if(ISSHAREDOBJ(ptr))
1264 GC_BAMBOO_DEBUGPRINT(0xed06);
1267 // scan all pointers in ptr
1268 unsigned INTPTR * pointer;
1269 pointer=pointerarray[type];
1271 /* Array of primitives */
1273 } else if (((INTPTR)pointer)==1) {
1274 /* Array of pointers */
1275 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1276 int length=ao->___length___;
1278 for(j=0; j<length; j++) {
1280 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1284 INTPTR size=pointer[0];
1286 for(i=1; i<=size; i++) {
1287 unsigned int offset=pointer[i];
1288 void * objptr=*((void **)(((char *)ptr)+offset));
1291 } // if (pointer==0) else if ... else ...
1293 } // while(gc_moreItems2())
1294 GC_BAMBOO_DEBUGPRINT(0xed07);
1295 gcbusystatus = false;
1296 // send mark finish msg to core coordinator
1297 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1298 GC_BAMBOO_DEBUGPRINT(0xed08);
1299 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1300 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1301 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1302 gcself_numreceiveobjs;
1303 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1306 GC_BAMBOO_DEBUGPRINT(0xed09);
1307 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1308 gcself_numsendobjs, gcself_numreceiveobjs, false);
1311 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1312 GC_BAMBOO_DEBUGPRINT(0xed0a);
1314 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1315 GC_BAMBOO_DEBUGPRINT(0xed0b);
1318 } // while(MARKPHASE == gcphase)
1323 inline void compact2Heaptophelper_I(int coren,
1328 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1329 if(STARTUPCORE == coren) {
1331 gcmovestartaddr = *p;
1332 gcdstcore = gctopcore;
1333 gcblock2fill = *numblocks + 1;
1335 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1337 GC_BAMBOO_DEBUGPRINT_REG(coren);
1338 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1339 GC_BAMBOO_DEBUGPRINT_REG(*p);
1340 GC_BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1341 if(memneed < *remain) {
1342 GC_BAMBOO_DEBUGPRINT(0xd104);
1344 gcrequiredmems[coren] = 0;
1345 gcloads[gctopcore] += memneed;
1346 *remain = *remain - memneed;
1348 GC_BAMBOO_DEBUGPRINT(0xd105);
1349 // next available block
1351 gcfilledblocks[gctopcore] += 1;
1353 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1354 gcloads[gctopcore] = newbase;
1355 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1356 gcstopblock[gctopcore]++;
1357 gctopcore = NEXTTOPCORE(gctopblock);
1359 *numblocks = gcstopblock[gctopcore];
1360 *p = gcloads[gctopcore];
1362 *remain=(b<NUMCORES4GC) ?
1363 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1364 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1365 GC_BAMBOO_DEBUGPRINT(0xd106);
1366 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1367 GC_BAMBOO_DEBUGPRINT_REG(*p);
1368 GC_BAMBOO_DEBUGPRINT_REG(b);
1369 GC_BAMBOO_DEBUGPRINT_REG(*remain);
1370 } // if(memneed < remain)
1372 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1374 inline void compact2Heaptop() {
1375 // no cores with spare mem and some cores are blocked with pending move
1376 // find the current heap top and make them move to the heap top
1378 int numblocks = gcfilledblocks[gctopcore];
1379 p = gcloads[gctopcore];
1382 int remain = (b<NUMCORES4GC) ?
1383 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1384 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1385 // check if the top core finishes
1386 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1387 if(gccorestatus[gctopcore] != 0) {
1388 GC_BAMBOO_DEBUGPRINT(0xd101);
1389 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1390 // let the top core finishes its own work first
1391 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1392 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1395 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1397 GC_BAMBOO_DEBUGPRINT(0xd102);
1398 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1399 GC_BAMBOO_DEBUGPRINT_REG(p);
1400 GC_BAMBOO_DEBUGPRINT_REG(b);
1401 GC_BAMBOO_DEBUGPRINT_REG(remain);
1402 for(int i = 0; i < NUMCORES4GC; i++) {
1403 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1404 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1405 GC_BAMBOO_DEBUGPRINT(0xd103);
1406 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1407 if(gccorestatus[gctopcore] != 0) {
1408 GC_BAMBOO_DEBUGPRINT(0xd101);
1409 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1410 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1411 // the top core is not free now
1414 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1415 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1416 } // for(i = 0; i < NUMCORES4GC; i++)
1417 GC_BAMBOO_DEBUGPRINT(0xd106);
1418 } // void compact2Heaptop()
1420 inline void resolvePendingMoveRequest() {
1421 GC_BAMBOO_DEBUGPRINT(0xeb01);
1422 GC_BAMBOO_DEBUGPRINT(0xeeee);
1423 for(int k = 0; k < NUMCORES4GC; k++) {
1424 GC_BAMBOO_DEBUGPRINT(0xf000+k);
1425 GC_BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1426 GC_BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1427 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1428 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1430 GC_BAMBOO_DEBUGPRINT(0xffff);
1433 bool nosparemem = true;
1434 bool haspending = false;
1435 bool hasrunning = false;
1436 bool noblock = false;
1437 int dstcore = 0; // the core who need spare mem
1438 int sourcecore = 0; // the core who has spare mem
1439 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1441 // check if there are cores with spare mem
1442 if(gccorestatus[i] == 0) {
1443 // finished working, check if it still have spare mem
1444 if(gcfilledblocks[i] < gcstopblock[i]) {
1445 // still have spare mem
1448 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1453 if(gccorestatus[j] != 0) {
1454 // not finished, check if it has pending move requests
1455 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1460 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1461 } // if(gccorestatus[i] == 0) else ...
1463 } // if(!haspending)
1464 if(!nosparemem && haspending) {
1468 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1469 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1470 gcrequiredmems[dstcore],
1473 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1474 GC_BAMBOO_DEBUGPRINT(0xeb02);
1475 GC_BAMBOO_DEBUGPRINT_REG(sourcecore);
1476 GC_BAMBOO_DEBUGPRINT_REG(dstcore);
1477 GC_BAMBOO_DEBUGPRINT_REG(startaddr);
1478 GC_BAMBOO_DEBUGPRINT_REG(tomove);
1479 if(STARTUPCORE == dstcore) {
1480 GC_BAMBOO_DEBUGPRINT(0xeb03);
1481 gcdstcore = sourcecore;
1483 gcmovestartaddr = startaddr;
1484 gcblock2fill = tomove;
1486 GC_BAMBOO_DEBUGPRINT(0xeb04);
1487 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1488 startaddr, tomove, false);
1495 } // for(i = 0; i < NUMCORES4GC; i++)
1496 GC_BAMBOO_DEBUGPRINT(0xcccc);
1497 GC_BAMBOO_DEBUGPRINT_REG(hasrunning);
1498 GC_BAMBOO_DEBUGPRINT_REG(haspending);
1499 GC_BAMBOO_DEBUGPRINT_REG(noblock);
1501 if(!hasrunning && !noblock) {
1502 gcphase = SUBTLECOMPACTPHASE;
1506 } // void resovePendingMoveRequest()
1509 int numblocks; // block num for heap
1510 INTPTR base; // base virtual address of current heap block
1511 INTPTR ptr; // virtual address of current heap top
1512 int offset; // offset in current heap block
1513 int blockbase; // virtual address of current small block to check
1514 int blockbound; // bound virtual address of current small blcok
1515 int sblockindex; // index of the small blocks
1516 int top; // real size of current heap block to check
1517 int bound; // bound size of current heap block to check
1518 }; // struct moveHelper
1520 // If out of boundary of valid shared memory, return false, else return true
1521 inline bool nextSBlock(struct moveHelper * orig) {
1522 orig->blockbase = orig->blockbound;
1524 bool sbchanged = false;
1525 INTPTR origptr = orig->ptr;
1526 int blockbase = orig->blockbase;
1527 int blockbound = orig->blockbound;
1528 int bound = orig->bound;
1529 GC_BAMBOO_DEBUGPRINT(0xecc0);
1530 GC_BAMBOO_DEBUGPRINT_REG(blockbase);
1531 GC_BAMBOO_DEBUGPRINT_REG(blockbound);
1532 GC_BAMBOO_DEBUGPRINT_REG(bound);
1533 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1535 // check if across a big block
1536 // TODO now do not zero out the whole memory, maybe the last two conditions
1538 if((blockbase>=bound)||(origptr>=bound)
1539 ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
1541 // end of current heap block, jump to next one
1543 GC_BAMBOO_DEBUGPRINT(0xecc1);
1544 GC_BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1545 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1546 GC_BAMBOO_DEBUGPRINT(orig->base);
1547 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1549 orig->ptr = orig->base; // set current ptr to out of boundary too
1552 orig->blockbase = orig->base;
1553 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1556 BLOCKINDEX(orig->base, &blocknum);
1557 if(bamboo_smemtbl[blocknum] == 0) {
1559 goto innernextSBlock;
1561 // check the bamboo_smemtbl to decide the real bound
1562 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1563 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1564 orig->sblockindex += 1;
1566 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1568 // check if this sblock should be skipped or have special start point
1569 int sbstart = gcsbstarttbl[orig->sblockindex];
1572 GC_BAMBOO_DEBUGPRINT(0xecc2);
1573 orig->sblockindex += 1;
1574 orig->blockbase += BAMBOO_SMEM_SIZE;
1575 goto outernextSBlock;
1576 } else if((sbstart != 0) && (sbchanged)) {
1577 // the first time to access this SBlock
1578 GC_BAMBOO_DEBUGPRINT(0xecc3);
1579 // not start from the very beginning
1580 orig->blockbase = sbstart;
1581 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1583 // setup information for this sblock
1584 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1585 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1586 orig->ptr = orig->blockbase + orig->offset;
1587 GC_BAMBOO_DEBUGPRINT(0xecc4);
1588 GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1589 GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1590 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1591 GC_BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1592 GC_BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1593 GC_BAMBOO_DEBUGPRINT_REG(orig->offset);
1594 if(orig->ptr >= orig->bound) {
1595 // met a lobj, move to next block
1596 goto innernextSBlock;
1600 } // bool nextSBlock(struct moveHelper * orig)
1602 // return false if there are no available data to compact
1603 inline bool initOrig_Dst(struct moveHelper * orig,
1604 struct moveHelper * to) {
1607 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1608 to->bound = BAMBOO_SMEM_SIZE_L;
1609 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1611 GC_BAMBOO_DEBUGPRINT(0xef01);
1612 GC_BAMBOO_DEBUGPRINT_REG(to->base);
1613 INTPTR tobase = to->base;
1614 to->ptr = tobase + to->offset;
1615 #ifdef GC_CACHE_ADAPT
1616 // initialize the gc_cache_revise_information
1617 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1618 int toindex = (tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
1619 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1621 gc_cache_revise_infomation.to_page_index = toindex;
1622 gc_cache_revise_infomation.orig_page_start_va = -1;
1623 #endif // GC_CACHE_ADAPT
1625 // init the orig ptr
1626 orig->numblocks = 0;
1627 orig->base = tobase;
1629 BLOCKINDEX(orig->base, &blocknum);
1630 INTPTR origbase = orig->base;
1631 // check the bamboo_smemtbl to decide the real bound
1632 orig->bound = origbase + bamboo_smemtbl[blocknum];
1633 orig->blockbase = origbase;
1634 orig->sblockindex = (origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
1635 GC_BAMBOO_DEBUGPRINT(0xef02);
1636 GC_BAMBOO_DEBUGPRINT_REG(origbase);
1637 GC_BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1638 GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1639 GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1641 int sbstart = gcsbstarttbl[orig->sblockindex];
1643 GC_BAMBOO_DEBUGPRINT(0xef03);
1646 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1647 return nextSBlock(orig);
1648 } else if(sbstart != 0) {
1649 GC_BAMBOO_DEBUGPRINT(0xef04);
1650 orig->blockbase = sbstart;
1652 GC_BAMBOO_DEBUGPRINT(0xef05);
1653 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1654 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1655 orig->ptr = orig->blockbase + orig->offset;
1656 GC_BAMBOO_DEBUGPRINT(0xef06);
1657 GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1660 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1662 inline void nextBlock(struct moveHelper * to) {
1663 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1664 to->bound += BAMBOO_SMEM_SIZE;
1666 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1667 to->offset = BAMBOO_CACHE_LINE_SIZE;
1668 to->ptr = to->base + to->offset;
1669 } // void nextBlock(struct moveHelper * to)
1671 #ifdef GC_CACHE_ADAPT
1672 inline void samplingDataConvert(int current_ptr) {
1673 unsigned int tmp_factor =
1674 current_ptr-gc_cache_revise_infomation.to_page_start_va;
1675 int topage=gc_cache_revise_infomation.to_page_index;
1676 int oldpage = gc_cache_revise_infomation.orig_page_index;
1677 unsigned int * newtable=&gccachesamplingtbl_r[topage];
1678 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
1680 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1681 (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
1682 newtable=(unsigned int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1683 oldtable=(unsigned int*) (((char *)oldtable)+size_cachesamplingtbl_local);
1685 } // inline void samplingDataConvert(int)
1687 inline void completePageConvert(struct moveHelper * orig,
1688 struct moveHelper * to,
1695 tocompare = gc_cache_revise_infomation.to_page_end_va;
1698 tocompare = gc_cache_revise_infomation.orig_page_end_va;
1700 if(ptr >= tocompare) {
1701 // end of an orig/to page
1702 // compute the impact of this page for the new page
1703 samplingDataConvert(current_ptr);
1704 // prepare for an new orig page
1705 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1706 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1707 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1708 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1709 gc_cache_revise_infomation.orig_page_index = tmp_index;
1710 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1712 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
1713 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1714 gc_cache_revise_infomation.to_page_index =
1715 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1718 } // inline void completePageConvert(...)
1719 #endif // GC_CACHE_ADAPT
1721 // endaddr does not contain spaces for headers
1722 inline bool moveobj(struct moveHelper * orig,
1723 struct moveHelper * to,
1725 if(stopblock == 0) {
1729 GC_BAMBOO_DEBUGPRINT(0xe201);
1730 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1731 GC_BAMBOO_DEBUGPRINT_REG(to->ptr);
1738 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1739 orig->ptr = (int*)(orig->ptr) + 1;
1741 #ifdef GC_CACHE_ADAPT
1742 completePageConvert(orig, to, to->ptr, false);
1744 INTPTR origptr = orig->ptr;
1745 int origbound = orig->bound;
1746 int origblockbound = orig->blockbound;
1747 if((origptr >= origbound) || (origptr == origblockbound)) {
1748 if(!nextSBlock(orig)) {
1749 // finished, no more data
1754 GC_BAMBOO_DEBUGPRINT(0xe202);
1755 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1756 GC_BAMBOO_DEBUGPRINT(((int *)(origptr))[0]);
1757 // check the obj's type, size and mark flag
1758 type = ((int *)(origptr))[0];
1761 // end of this block, go to next one
1762 if(!nextSBlock(orig)) {
1763 // finished, no more data
1767 } else if(type < NUMCLASSES) {
1769 size = classsize[type];
1772 struct ArrayObject *ao=(struct ArrayObject *)(origptr);
1773 int elementsize=classsize[type];
1774 int length=ao->___length___;
1775 size=sizeof(struct ArrayObject)+length*elementsize;
1777 mark = ((int *)(origptr))[6];
1778 bool isremote = ((((int *)(origptr))[6] & REMOTEM) != 0);
1779 GC_BAMBOO_DEBUGPRINT(0xe203);
1780 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1781 GC_BAMBOO_DEBUGPRINT_REG(size);
1782 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
1783 // should be able to across it
1784 if((mark & MARKED) != 0) {
1785 int totop = to->top;
1786 int tobound = to->bound;
1787 GC_BAMBOO_DEBUGPRINT(0xe204);
1791 // marked obj, copy it to current heap top
1792 // check to see if remaining space is enough
1793 if(totop + isize > tobound) {
1794 // fill 0 indicating the end of this block
1795 BAMBOO_MEMSET_WH(to->ptr, '\0', tobound - totop);
1796 // fill the header of this block and then go to next block
1797 to->offset += tobound - totop;
1798 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1799 (*((int*)(to->base))) = to->offset;
1800 #ifdef GC_CACHE_ADAPT
1801 int tmp_ptr = to->ptr;
1802 #endif // GC_CACHE_ADAPT
1804 #ifdef GC_CACHE_ADAPT
1805 completePageConvert(orig, to, tmp_ptr, true);
1806 #endif // GC_CACHE_ADAPT
1807 if(stopblock == to->numblocks) {
1808 // already fulfilled the block
1810 } // if(stopblock == to->numblocks)
1811 } // if(to->top + isize > to->bound)
1812 // set the mark field to 2, indicating that this obj has been moved
1813 // and need to be flushed
1814 ((int *)(origptr))[6] = COMPACTED;
1815 INTPTR toptr = to->ptr;
1816 if(toptr != origptr) {
1817 if((int)(origptr) < (int)(toptr)+size) {
1818 memmove(toptr, origptr, size);
1820 memcpy(toptr, origptr, size);
1822 // fill the remaining space with -2
1823 BAMBOO_MEMSET_WH(toptr+size, -2, isize-size);
1825 // store mapping info
1826 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1827 #ifdef LOCALHASHTBL_TEST
1828 RuntimeHashadd_I(gcpointertbl, origptr, toptr);
1830 mgchashInsert_I(gcpointertbl, origptr, toptr);
1833 // add to the sharedptbl
1834 if(gcsharedptbl != NULL) {
1835 mgcsharedhashInsert_I(gcsharedptbl, origptr, toptr);
1838 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1839 GC_BAMBOO_DEBUGPRINT(0xcdce);
1840 GC_BAMBOO_DEBUGPRINT_REG(origptr);
1841 GC_BAMBOO_DEBUGPRINT_REG(toptr);
1842 GC_BAMBOO_DEBUGPRINT_REG(isize);
1843 gccurr_heaptop -= isize;
1845 to->offset += isize;
1847 #ifdef GC_CACHE_ADAPT
1848 int tmp_ptr = to->ptr;
1849 #endif // GC_CACHE_ADAPT
1850 if(to->top == to->bound) {
1851 // fill the header of this block and then go to next block
1852 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1853 (*((int*)(to->base))) = to->offset;
1856 #ifdef GC_CACHE_ADAPT
1857 completePageConvert(orig, to, tmp_ptr, true);
1858 #endif // GC_CACHE_ADAPT
1860 GC_BAMBOO_DEBUGPRINT(0xe205);
1864 GC_BAMBOO_DEBUGPRINT_REG(isize);
1865 GC_BAMBOO_DEBUGPRINT_REG(size);
1866 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1867 GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1868 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1869 GC_BAMBOO_DEBUGPRINT(0xe206);
1870 if(!nextSBlock(orig)) {
1871 // finished, no more data
1875 GC_BAMBOO_DEBUGPRINT(0xe207);
1876 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1878 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
1880 // should be invoked with interrupt closed
1881 inline int assignSpareMem_I(int sourcecore,
1886 BLOCKINDEX(gcloads[sourcecore], &b);
1887 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
1888 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
1889 int remain = boundptr - gcloads[sourcecore];
1890 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
1891 *startaddr = gcloads[sourcecore];
1892 *tomove = gcfilledblocks[sourcecore] + 1;
1893 if(memneed < remain) {
1894 gcloads[sourcecore] += memneed;
1897 // next available block
1898 gcfilledblocks[sourcecore] += 1;
1900 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
1901 gcloads[sourcecore] = newbase;
1902 return requiredmem-remain;
1904 } // int assignSpareMem_I(int ,int * , int * , int * )
1906 // should be invoked with interrupt closed
1907 inline bool gcfindSpareMem_I(int * startaddr,
1912 for(int k = 0; k < NUMCORES4GC; k++) {
1913 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
1914 // check if this stopped core has enough mem
1915 assignSpareMem_I(k, requiredmem, tomove, startaddr);
1920 // if can not find spare mem right now, hold the request
1921 gcrequiredmems[requiredcore] = requiredmem;
1924 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
1926 inline bool compacthelper(struct moveHelper * orig,
1927 struct moveHelper * to,
1930 bool * localcompact) {
1931 // scan over all objs in this block, compact the marked objs
1932 // loop stop when finishing either scanning all active objs or
1933 // fulfilled the gcstopblock
1934 GC_BAMBOO_DEBUGPRINT(0xe101);
1935 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
1936 GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1938 while(orig->ptr < gcmarkedptrbound) {
1939 bool stop = moveobj(orig, to, gcblock2fill);
1944 #ifdef GC_CACHE_ADAPT
1945 // end of an to page, wrap up its information
1946 samplingDataConvert(to->ptr);
1947 #endif // GC_CACHE_ADAPT
1948 // if no objs have been compact, do nothing,
1949 // otherwise, fill the header of this block
1950 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
1951 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1952 (*((int*)(to->base))) = to->offset;
1956 to->top -= BAMBOO_CACHE_LINE_SIZE;
1957 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
1959 *heaptopptr = to->ptr;
1960 *filledblocks = to->numblocks;
1962 GC_BAMBOO_DEBUGPRINT(0xe102);
1963 GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1964 GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1965 GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1966 GC_BAMBOO_DEBUGPRINT_REG(*filledblocks);
1967 GC_BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
1969 // send msgs to core coordinator indicating that the compact is finishing
1970 // send compact finish message to core coordinator
1971 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1972 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
1973 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
1974 if(orig->ptr < gcmarkedptrbound) {
1975 GC_BAMBOO_DEBUGPRINT(0xe103);
1978 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1979 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
1980 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
1981 GC_BAMBOO_DEBUGPRINT(0xe104);
1984 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1985 GC_BAMBOO_DEBUGPRINT(0xe105);
1988 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1990 GC_BAMBOO_DEBUGPRINT(0xe106);
1991 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1996 if(orig->ptr < gcmarkedptrbound) {
1997 GC_BAMBOO_DEBUGPRINT(0xe107);
2000 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2001 *filledblocks, *heaptopptr, gccurr_heaptop, false);
2003 GC_BAMBOO_DEBUGPRINT(0xe108);
2004 GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2005 // finish compacting
2006 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2007 *filledblocks, *heaptopptr, 0, false);
2009 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2011 if(orig->ptr < gcmarkedptrbound) {
2012 GC_BAMBOO_DEBUGPRINT(0xe109);
2013 // still have unpacked obj
2021 GC_BAMBOO_DEBUGPRINT(0xe10a);
2023 to->ptr = gcmovestartaddr;
2024 to->numblocks = gcblock2fill - 1;
2025 to->bound = (to->numblocks==0) ?
2026 BAMBOO_SMEM_SIZE_L :
2027 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2028 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2029 to->offset = to->ptr - to->base;
2030 to->top = (to->numblocks==0) ?
2031 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2033 to->offset = BAMBOO_CACHE_LINE_SIZE;
2034 to->ptr += to->offset; // for header
2035 to->top += to->offset;
2036 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2037 *localcompact = true;
2039 *localcompact = false;
2041 #ifdef GC_CACHE_ADAPT
2042 // initialize the gc_cache_revise_information
2043 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2044 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2045 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2046 gc_cache_revise_infomation.to_page_index =
2047 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2048 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2049 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2050 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2051 gc_cache_revise_infomation.orig_page_index =
2052 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2053 #endif // GC_CACHE_ADAPT
2056 GC_BAMBOO_DEBUGPRINT(0xe10b);
2058 } // void compacthelper()
2060 inline void compact() {
2061 if(COMPACTPHASE != gcphase) {
2062 BAMBOO_EXIT(0xb002);
2065 // initialize pointers for comapcting
2066 struct moveHelper * orig =
2067 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2068 struct moveHelper * to =
2069 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2071 if(!initOrig_Dst(orig, to)) {
2072 // no available data to compact
2073 // send compact finish msg to STARTUP core
2074 GC_BAMBOO_DEBUGPRINT(0xe001);
2075 GC_BAMBOO_DEBUGPRINT_REG(to->base);
2076 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2077 0, to->base, 0, false);
2082 #ifdef GC_CACHE_ADAPT
2083 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2084 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2085 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2086 gc_cache_revise_infomation.orig_page_index =
2087 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2088 #endif // GC_CACHE_ADAPT
2090 int filledblocks = 0;
2091 INTPTR heaptopptr = 0;
2092 bool localcompact = true;
2093 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2099 // if return NULL, means
2100 // 1. objptr is NULL
2101 // 2. objptr is not a shared obj
2102 // in these cases, remain the original value is OK
2103 inline void * flushObj(void * objptr) {
2104 GC_BAMBOO_DEBUGPRINT(0xe401);
2105 if(objptr == NULL) {
2108 void * dstptr = NULL;
2109 if(ISSHAREDOBJ(objptr)) {
2110 GC_BAMBOO_DEBUGPRINT(0xe402);
2111 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2112 // a shared obj ptr, change to new address
2113 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2114 #ifdef LOCALHASHTBL_TEST
2115 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2117 dstptr = mgchashSearch(gcpointertbl, objptr);
2119 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2120 GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2122 if(NULL == dstptr) {
2124 GC_BAMBOO_DEBUGPRINT(0xe403);
2125 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2126 GC_BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2127 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2128 // error! the obj is right on this core, but cannot find it
2129 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2130 BAMBOO_EXIT(0xb003);
2132 int hostc = hostcore(objptr);
2133 // check the corresponsing sharedptbl
2134 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2135 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2137 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2138 if(dstptr != NULL) {
2139 #ifdef LOCALHASHTBL_TEST
2140 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2142 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2146 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2148 if(dstptr == NULL) {
2149 // still can not get the mapping info,
2150 // send msg to host core for the mapping info
2151 gcobj2map = (int)objptr;
2154 // the first time require the mapping, send msg to the hostcore
2155 // for the mapping info
2156 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2157 BAMBOO_NUM_OF_CORE, false);
2163 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2164 #ifdef LOCALHASHTBL_TEST
2165 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2167 dstptr = mgchashSearch(gcpointertbl, objptr);
2169 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2170 } // if(dstptr == NULL)
2171 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2172 GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2173 } // if(NULL == dstptr)
2174 } // if(ISSHAREDOBJ(objptr))
2175 // if not a shared obj, return NULL to indicate no need to flush
2176 GC_BAMBOO_DEBUGPRINT(0xe404);
2178 } // void flushObj(void * objptr)
2180 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2182 // flush current stack
2183 while(stackptr!=NULL) {
2184 for(i=0; i<stackptr->size; i++) {
2185 if(stackptr->array[i] != NULL) {
2186 void * dst = flushObj(stackptr->array[i]);
2188 stackptr->array[i] = dst;
2192 stackptr=stackptr->next;
2196 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2197 for(i=0; i<NUMCLASSES; i++) {
2198 struct parameterwrapper ** queues =
2199 objectqueues[BAMBOO_NUM_OF_CORE][i];
2200 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2201 for(j = 0; j < length; ++j) {
2202 struct parameterwrapper * parameter = queues[j];
2203 struct ObjectHash * set=parameter->objectset;
2204 struct ObjectNode * ptr=set->listhead;
2206 void * dst = flushObj((void *)ptr->key);
2212 ObjectHashrehash(set);
2217 // flush current task descriptor
2218 if(currtpd != NULL) {
2219 for(i=0; i<currtpd->numParameters; i++) {
2220 void * dst = flushObj(currtpd->parameterArray[i]);
2222 currtpd->parameterArray[i] = dst;
2227 // flush active tasks
2228 if(activetasks != NULL) {
2229 struct genpointerlist * ptr=activetasks->list;
2231 struct taskparamdescriptor *tpd=ptr->src;
2233 for(i=0; i<tpd->numParameters; i++) {
2234 void * dst = flushObj(tpd->parameterArray[i]);
2236 tpd->parameterArray[i] = dst;
2241 genrehash(activetasks);
2244 // flush cached transferred obj
2245 struct QueueItem * tmpobjptr = getHead(&objqueue);
2246 while(tmpobjptr != NULL) {
2247 struct transObjInfo * objInfo =
2248 (struct transObjInfo *)(tmpobjptr->objectptr);
2249 void * dst = flushObj(objInfo->objptr);
2251 objInfo->objptr = dst;
2253 tmpobjptr = getNextQueueItem(tmpobjptr);
2256 // flush cached objs to be transferred
2257 struct QueueItem * item = getHead(totransobjqueue);
2258 while(item != NULL) {
2259 struct transObjInfo * totransobj =
2260 (struct transObjInfo *)(item->objectptr);
2261 void * dst = flushObj(totransobj->objptr);
2263 totransobj->objptr = dst;
2265 item = getNextQueueItem(item);
2266 } // while(item != NULL)
2268 // enqueue lock related info
2269 for(i = 0; i < runtime_locklen; ++i) {
2270 void * dst = flushObj(runtime_locks[i].redirectlock);
2272 runtime_locks[i].redirectlock = (int)dst;
2274 if(runtime_locks[i].value != NULL) {
2275 void * dst=flushObj(runtime_locks[i].value);
2277 runtime_locks[i].value = (int)dst;
2283 // flush global thread queue
2284 unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
2285 if(thread_counter > 0) {
2286 unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
2287 for(i = thread_counter; i > 0; i--) {
2288 bamboo_thread_queue[4+start] =
2289 (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start]));
2290 start = (start+1)&bamboo_max_thread_num_mask;
2294 unlockthreadqueue();
2296 } // void flushRuntimeObj(struct garbagelist * stackptr)
2298 inline void transmappinginfo() {
2299 // broadcast the sharedptbl pointer
2300 for(int i = 0; i < NUMCORESACTIVE; i++) {
2301 if(i != BAMBOO_NUM_OF_CORE) {
2302 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2306 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2307 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2311 inline void flush(struct garbagelist * stackptr) {
2313 flushRuntimeObj(stackptr);
2316 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2317 bool hasItems = gc_moreItems_I();
2318 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2323 GC_BAMBOO_DEBUGPRINT(0xe301);
2324 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2325 void * ptr = gc_dequeue_I();
2326 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2327 if(ISSHAREDOBJ(ptr)) {
2328 // should be a local shared obj and should have mapping info
2329 ptr = flushObj(ptr);
2330 GC_BAMBOO_DEBUGPRINT(0xe302);
2331 GC_BAMBOO_DEBUGPRINT_REG(ptr);
2332 GC_BAMBOO_DEBUGPRINT_REG(tptr);
2333 GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2335 BAMBOO_EXIT(0xb004);
2337 } // if(ISSHAREDOBJ(ptr))
2338 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2339 int type = ((int *)(ptr))[0];
2340 // scan all pointers in ptr
2341 unsigned INTPTR * pointer;
2342 pointer=pointerarray[type];
2343 GC_BAMBOO_DEBUGPRINT(0xe303);
2344 GC_BAMBOO_DEBUGPRINT_REG(pointer);
2346 /* Array of primitives */
2348 } else if (((INTPTR)pointer)==1) {
2349 GC_BAMBOO_DEBUGPRINT(0xe304);
2350 /* Array of pointers */
2351 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2352 int length=ao->___length___;
2354 for(j=0; j<length; j++) {
2355 GC_BAMBOO_DEBUGPRINT(0xe305);
2357 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2358 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2359 if(objptr != NULL) {
2360 void * dst = flushObj(objptr);
2362 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2367 GC_BAMBOO_DEBUGPRINT(0xe306);
2368 INTPTR size=pointer[0];
2370 for(i=1; i<=size; i++) {
2371 GC_BAMBOO_DEBUGPRINT(0xe307);
2372 unsigned int offset=pointer[i];
2373 void * objptr=*((void **)(((char *)ptr)+offset));
2374 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2375 if(objptr != NULL) {
2376 void * dst = flushObj(objptr);
2378 *((void **)(((char *)ptr)+offset)) = dst;
2381 } // for(i=1; i<=size; i++)
2382 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2383 // restore the mark field, indicating that this obj has been flushed
2384 if(ISSHAREDOBJ(ptr)) {
2385 ((int *)(ptr))[6] = INIT;
2387 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2388 } // while(gc_moreItems())
2389 GC_BAMBOO_DEBUGPRINT(0xe308);
2391 // TODO bug here: the startup core contains all lobjs' info, thus all the
2392 // lobjs are flushed in sequence.
2394 while(gc_lobjmoreItems_I()) {
2395 GC_BAMBOO_DEBUGPRINT(0xe309);
2396 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2397 ptr = flushObj(ptr);
2398 GC_BAMBOO_DEBUGPRINT(0xe30a);
2399 GC_BAMBOO_DEBUGPRINT_REG(ptr);
2400 GC_BAMBOO_DEBUGPRINT_REG(tptr);
2401 GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2403 BAMBOO_EXIT(0xb005);
2405 if(((int *)(ptr))[6] == COMPACTED) {
2406 int type = ((int *)(ptr))[0];
2407 // scan all pointers in ptr
2408 unsigned INTPTR * pointer;
2409 pointer=pointerarray[type];
2410 GC_BAMBOO_DEBUGPRINT(0xe30b);
2411 GC_BAMBOO_DEBUGPRINT_REG(pointer);
2413 /* Array of primitives */
2415 } else if (((INTPTR)pointer)==1) {
2416 GC_BAMBOO_DEBUGPRINT(0xe30c);
2417 /* Array of pointers */
2418 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2419 int length=ao->___length___;
2421 for(j=0; j<length; j++) {
2422 GC_BAMBOO_DEBUGPRINT(0xe30d);
2424 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2425 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2426 if(objptr != NULL) {
2427 void * dst = flushObj(objptr);
2429 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2434 GC_BAMBOO_DEBUGPRINT(0xe30e);
2435 INTPTR size=pointer[0];
2437 for(i=1; i<=size; i++) {
2438 GC_BAMBOO_DEBUGPRINT(0xe30f);
2439 unsigned int offset=pointer[i];
2440 void * objptr=*((void **)(((char *)ptr)+offset));
2442 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2443 if(objptr != NULL) {
2444 void * dst = flushObj(objptr);
2446 *((void **)(((char *)ptr)+offset)) = dst;
2449 } // for(i=1; i<=size; i++)
2450 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2451 // restore the mark field, indicating that this obj has been flushed
2452 ((int *)(ptr))[6] = INIT;
2453 } // if(((int *)(ptr))[6] == COMPACTED)
2454 } // while(gc_lobjmoreItems())
2455 GC_BAMBOO_DEBUGPRINT(0xe310);
2457 // send flush finish message to core coordinator
2458 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2459 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2461 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2463 GC_BAMBOO_DEBUGPRINT(0xe311);
2466 #ifdef GC_CACHE_ADAPT
2467 // prepare for cache adaption:
2468 // -- flush the shared heap
2469 // -- clean dtlb entries
2470 // -- change cache strategy
2471 void cacheAdapt_gc(bool isgccachestage) {
2472 // flush the shared heap
2473 BAMBOO_CACHE_FLUSH_L2();
2475 // clean the dtlb entries
2476 BAMBOO_CLEAN_DTLB();
2478 // change the cache strategy
2479 gccachestage = isgccachestage;
2480 } // cacheAdapt_gc(bool isgccachestage)
2482 // the master core decides how to adapt cache strategy for the mutator
2483 // according to collected statistic data
2485 // make all pages hfh
2486 int cacheAdapt_policy_h4h(){
2487 unsigned int page_index = 0;
2489 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2491 int * tmp_p = gccachepolicytbl+1;
2492 for(page_index = 0; page_index < page_num; page_index++) {
2493 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2494 bamboo_cache_policy_t policy = {0};
2495 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2496 *tmp_p = page_index;
2498 *tmp_p = policy.word;
2504 } // int cacheAdapt_policy_hfh()
2506 // make all pages local as non-cache-adaptable gc local mode
2507 int cacheAdapt_policy_local(){
2508 unsigned int page_index = 0;
2510 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2512 int * tmp_p = gccachepolicytbl+1;
2513 for(page_index = 0; page_index < page_num; page_index++) {
2514 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2515 bamboo_cache_policy_t policy = {0};
2517 BLOCKINDEX(page_sva, &block);
2518 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2519 // locally cache the page in the hotest core
2520 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2521 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2522 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2523 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2524 *tmp_p = page_index;
2526 *tmp_p = policy.word;
2532 } // int cacheAdapt_policy_local()
2534 int cacheAdapt_policy_hotest(){
2535 unsigned int page_index = 0;
2537 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2539 int * tmp_p = gccachepolicytbl+1;
2540 for(page_index = 0; page_index < page_num; page_index++) {
2541 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2542 bamboo_cache_policy_t policy = {0};
2544 unsigned int hotfreq = 0;
2546 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2547 for(int i = 0; i < NUMCORESACTIVE; i++) {
2548 unsigned int freq = *local_tbl;
2549 local_tbl=(unsigned int *)(
2550 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2552 // check the freqency, decide if this page is hot for the core
2553 if(hotfreq < freq) {
2559 // Decide the cache strategy for this page
2560 // If decide to adapt a new cache strategy, write into the shared block of
2561 // the gcsharedsamplingtbl. The mem recording information that has been
2562 // written is enough to hold the information.
2563 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2565 // this page has not been accessed, do not change its cache policy
2568 // locally cache the page in the hotest core
2569 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2570 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2571 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2572 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2573 *tmp_p = page_index;
2575 *tmp_p = policy.word;
2582 } // int cacheAdapt_policy_hotest()
2584 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
2585 // cache the page on the core that accesses it the most if that core accesses
2586 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
2588 int cacheAdapt_policy_dominate(){
2589 unsigned int page_index = 0;
2591 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2593 int * tmp_p = gccachepolicytbl+1;
2594 for(page_index = 0; page_index < page_num; page_index++) {
2595 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2596 bamboo_cache_policy_t policy = {0};
2598 unsigned long long totalfreq = 0;
2599 unsigned int hotfreq = 0;
2601 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2602 for(int i = 0; i < NUMCORESACTIVE; i++) {
2603 unsigned int freq = *local_tbl;
2604 local_tbl=(unsigned int *)(
2605 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2607 // check the freqency, decide if this page is hot for the core
2608 if(hotfreq < freq) {
2614 // Decide the cache strategy for this page
2615 // If decide to adapt a new cache strategy, write into the shared block of
2617 // Format: page start va + cache policy
2619 // this page has not been accessed, do not change its cache policy
2623 (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
2624 hotfreq/=BAMBOO_PAGE_SIZE;
2625 if(hotfreq < totalfreq) {
2627 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2629 // locally cache the page in the hotest core
2630 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2631 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2632 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2633 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2635 *tmp_p = page_index;
2637 *tmp_p = policy.word;
2643 } // int cacheAdapt_policy_dominate()
2645 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
2647 void gc_quicksort(unsigned long long *array,
2653 int rightIdx = right;
2654 if((right-left+1) >= 1) {
2655 pivot = (left+right)/2;
2656 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
2657 int pivotValue = array[pivot*3-offset];
2658 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
2661 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
2664 // swap [leftIdx] & [rightIdx]
2665 for(int k = 0; k < 3; k++) {
2666 unsigned long long tmp = array[3*rightIdx-k];
2667 array[3*rightIdx-k] = array[3*leftIdx-k];
2668 array[3*leftIdx-k] = tmp;
2672 if((leftIdx-1) == pivot) {
2673 pivot = rightIdx = rightIdx + 1;
2674 } else if((leftIdx+1) == pivot) {
2675 pivot = leftIdx = leftIdx-1;
2678 gc_quicksort(array, left, pivot-1, offset);
2679 gc_quicksort(array, pivot+1, right, offset);
2682 } // void gc_quicksort(...)
2684 // Every page cached on the core that accesses it the most.
2685 // Check to see if any core's pages total more accesses than threshold
2686 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2687 // most remote accesses and hash for home them until we get below
2688 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
2689 int cacheAdapt_policy_overload(){
2690 unsigned int page_index = 0;
2692 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2694 int * tmp_p = gccachepolicytbl+1;
2695 unsigned long long workload[NUMCORESACTIVE];
2696 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2697 unsigned long long total_workload = 0;
2698 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2699 memset(core2heavypages,0,
2700 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2701 for(page_index = 0; page_index < page_num; page_index++) {
2702 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2703 bamboo_cache_policy_t policy = {0};
2705 unsigned long long totalfreq = 0;
2706 unsigned int hotfreq = 0;
2708 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2709 for(int i = 0; i < NUMCORESACTIVE; i++) {
2710 unsigned int freq = *local_tbl;
2711 local_tbl=(unsigned int *)(
2712 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2714 // check the freqency, decide if this page is hot for the core
2715 if(hotfreq < freq) {
2720 // Decide the cache strategy for this page
2721 // If decide to adapt a new cache strategy, write into the shared block of
2722 // the gcsharedsamplingtbl. The mem recording information that has been
2723 // written is enough to hold the information.
2724 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2726 // this page has not been accessed, do not change its cache policy
2730 totalfreq/=BAMBOO_PAGE_SIZE;
2731 hotfreq/=BAMBOO_PAGE_SIZE;
2732 // locally cache the page in the hotest core
2733 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2734 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2735 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2736 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2737 *tmp_p = page_index;
2739 *tmp_p = policy.word;
2742 workload[hotestcore] += totalfreq;
2743 total_workload += totalfreq;
2744 // insert into core2heavypages using quicksort
2745 unsigned long long remoteaccess = totalfreq - hotfreq;
2746 int index = (int)core2heavypages[hotestcore][0];
2747 core2heavypages[hotestcore][3*index+3] = remoteaccess;
2748 core2heavypages[hotestcore][3*index+2] = totalfreq;
2749 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2750 core2heavypages[hotestcore][0]++;
2753 unsigned long long workload_threshold =
2754 total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2755 // Check the workload of each core
2756 for(int i = 0; i < NUMCORESACTIVE; i++) {
2758 int index = (int)core2heavypages[i][0];
2759 if(workload[i] > workload_threshold) {
2760 // sort according to the remoteaccess
2761 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2762 while((workload[i] > workload_threshold) && (j<index*3)) {
2763 // hfh those pages with more remote accesses
2764 bamboo_cache_policy_t policy = {0};
2765 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2766 *((int*)core2heavypages[i][j]) = policy.word;
2767 workload[i] -= core2heavypages[i][j+1];
2774 } // int cacheAdapt_policy_overload()
2776 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
2777 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
2778 // Every page cached on the core that accesses it the most.
2779 // Check to see if any core's pages total more accesses than threshold
2780 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2781 // most remote accesses and hash for home them until we get below
2782 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
2783 // Sort pages based on activity....
2784 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
2785 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
2786 // then start hfh these pages(selecting the ones with the most remote
2787 // accesses first or fewest local accesses) until we get below
2788 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
2789 int cacheAdapt_policy_crowd(){
2790 unsigned int page_index = 0;
2792 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2794 int * tmp_p = gccachepolicytbl+1;
2795 unsigned long long workload[NUMCORESACTIVE];
2796 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2797 unsigned long long total_workload = 0;
2798 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2799 memset(core2heavypages,0,
2800 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2801 for(page_index = 0; page_index < page_num; page_index++) {
2802 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2803 bamboo_cache_policy_t policy = {0};
2805 unsigned long long totalfreq = 0;
2806 unsigned int hotfreq = 0;
2808 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2809 for(int i = 0; i < NUMCORESACTIVE; i++) {
2810 unsigned int freq = *local_tbl;
2811 local_tbl=(unsigned int *)(
2812 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2814 // check the freqency, decide if this page is hot for the core
2815 if(hotfreq < freq) {
2820 // Decide the cache strategy for this page
2821 // If decide to adapt a new cache strategy, write into the shared block of
2822 // the gcsharedsamplingtbl. The mem recording information that has been
2823 // written is enough to hold the information.
2824 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2826 // this page has not been accessed, do not change its cache policy
2829 totalfreq/=BAMBOO_PAGE_SIZE;
2830 hotfreq/=BAMBOO_PAGE_SIZE;
2831 // locally cache the page in the hotest core
2832 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2833 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2834 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2835 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2836 *tmp_p = page_index;
2838 *tmp_p = policy.word;
2841 workload[hotestcore] += totalfreq;
2842 total_workload += totalfreq;
2843 // insert into core2heavypages using quicksort
2844 unsigned long long remoteaccess = totalfreq - hotfreq;
2845 int index = (int)core2heavypages[hotestcore][0];
2846 core2heavypages[hotestcore][3*index+3] = remoteaccess;
2847 core2heavypages[hotestcore][3*index+2] = totalfreq;
2848 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2849 core2heavypages[hotestcore][0]++;
2852 unsigned long long workload_threshold =
2853 total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2854 // Check the workload of each core
2855 for(int i = 0; i < NUMCORESACTIVE; i++) {
2857 int index = (int)core2heavypages[i][0];
2858 if(workload[i] > workload_threshold) {
2859 // sort according to the remoteaccess
2860 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2861 while((workload[i] > workload_threshold) && (j<index*3)) {
2862 // hfh those pages with more remote accesses
2863 bamboo_cache_policy_t policy = {0};
2864 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2865 *((int*)core2heavypages[i][j]) = policy.word;
2866 workload[i] -= core2heavypages[i][j+1];
2871 // Check if the accesses are crowded on few pages
2872 // sort according to the total access
2874 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
2875 unsigned long long threshold =
2876 GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2877 int num_crowded = 0;
2878 unsigned long long t_workload = 0;
2880 t_workload += core2heavypages[i][j+num_crowded*3+1];
2882 } while(t_workload < threshold);
2883 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
2884 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
2885 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
2886 // need to hfh these pages
2887 // sort the pages according to remote access
2888 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
2889 // h4h those pages with more remote accesses
2890 bamboo_cache_policy_t policy = {0};
2891 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2892 *((int*)core2heavypages[i][j]) = policy.word;
2893 workload[i] -= core2heavypages[i][j+1];
2894 t_workload -= core2heavypages[i][j+1];
2896 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2902 } // int cacheAdapt_policy_overload()
2904 void cacheAdapt_master() {
2905 #ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
2906 gc_output_cache_sampling_r();
2907 #endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
2909 // check the statistic data
2910 // for each page, decide the new cache strategy
2911 #ifdef GC_CACHE_ADAPT_POLICY1
2912 numchanged = cacheAdapt_policy_h4h();
2913 #elif defined GC_CACHE_ADAPT_POLICY2
2914 numchanged = cacheAdapt_policy_local();
2915 #elif defined GC_CACHE_ADAPT_POLICY3
2916 numchanged = cacheAdapt_policy_hotest();
2917 #elif defined GC_CACHE_ADAPT_POLICY4
2918 numchanged = cacheAdapt_policy_dominate();
2919 #elif defined GC_CACHE_ADAPT_POLICY5
2920 numchanged = cacheAdapt_policy_overload();
2921 #elif defined GC_CACHE_ADAPT_POLICY6
2922 numchanged = cacheAdapt_policy_crowd();
2924 *gccachepolicytbl = numchanged;
2927 // adapt the cache strategy for the mutator
2928 void cacheAdapt_mutator() {
2929 int numchanged = *gccachepolicytbl;
2930 // check the changes and adapt them
2931 int * tmp_p = gccachepolicytbl+1;
2932 while(numchanged--) {
2933 // read out the policy
2934 int page_index = *tmp_p;
2935 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
2937 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
2938 policy, BAMBOO_PAGE_SIZE);
2944 void gc_output_cache_sampling() {
2945 unsigned int page_index = 0;
2947 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2948 for(page_index = 0; page_index < page_num; page_index++) {
2949 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2951 BLOCKINDEX(page_sva, &block);
2952 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2953 tprintf("va: %x page_index: %d host: %d\n",
2954 (int)page_sva, page_index, coren);
2955 for(int i = 0; i < NUMCORESACTIVE; i++) {
2956 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl
2957 +size_cachesamplingtbl_local*i);
2958 unsigned int freq = local_tbl[page_index];
2959 printf("%8d ",freq);
2963 printf("=================\n");
2964 } // gc_output_cache_sampling
2966 void gc_output_cache_sampling_r() {
2967 unsigned int page_index = 0;
2969 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2970 for(page_index = 0; page_index < page_num; page_index++) {
2971 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2973 BLOCKINDEX(page_sva, &block);
2974 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2975 tprintf("va: %x page_index: %d host: %d\n",
2976 (int)page_sva, page_index, coren);
2977 for(int i = 0; i < NUMCORESACTIVE; i++) {
2978 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl_r
2979 +size_cachesamplingtbl_local_r*i);
2980 unsigned int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
2981 printf("%8d ",freq);
2985 printf("=================\n");
2986 } // gc_output_cache_sampling
2987 #endif // GC_CACHE_ADAPT
2989 inline void gc_collect(struct garbagelist * stackptr) {
2990 // inform the master that this core is at a gc safe point and is ready to
2992 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
2993 self_numreceiveobjs, false);
2995 // core collector routine
2997 if(INITPHASE == gcphase) {
3001 #ifdef RAWPATH // TODO GC_DEBUG
3002 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3005 #ifdef GC_CACHE_ADAPT
3006 // prepare for cache adaption:
3007 cacheAdapt_gc(true);
3008 #endif // GC_CACHE_ADAPT
3009 //send init finish msg to core coordinator
3010 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3013 if(MARKPHASE == gcphase) {
3017 #ifdef RAWPATH // TODO GC_DEBUG
3018 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3019 udn_tile_coord_y());
3021 mark(true, stackptr);
3022 #ifdef RAWPATH // TODO GC_DEBUG
3023 printf("(%x,%x) Finish mark phase, start compact phase\n",
3024 udn_tile_coord_x(), udn_tile_coord_y());
3027 #ifdef RAWPATH // TODO GC_DEBUG
3028 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3029 udn_tile_coord_y());
3033 if(MAPPHASE == gcphase) {
3037 #ifdef RAWPATH // TODO GC_DEBUG
3038 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3039 udn_tile_coord_y());
3042 #ifdef RAWPATH // TODO GC_DEBUG
3043 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3044 udn_tile_coord_y());
3048 if(FLUSHPHASE == gcphase) {
3052 #ifdef RAWPATH // TODO GC_DEBUG
3053 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3054 udn_tile_coord_y());
3057 // send the num of obj/liveobj/forwardobj to the startupcore
3058 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3059 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3060 gc_num_liveobj, gc_num_forwardobj, false);
3063 #endif // GC_PROFLIE
3065 #ifdef RAWPATH // TODO GC_DEBUG
3066 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3067 udn_tile_coord_y());
3070 #ifdef GC_CACHE_ADAPT
3072 if(PREFINISHPHASE == gcphase) {
3076 #ifdef RAWPATH // TODO GC_DEBUG
3077 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3078 udn_tile_coord_y());
3080 // cache adapt phase
3081 cacheAdapt_mutator();
3082 cacheAdapt_gc(false);
3083 //send init finish msg to core coordinator
3084 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3085 #ifdef RAWPATH // TODO GC_DEBUG
3086 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3087 udn_tile_coord_y());
3089 #ifdef GC_CACHE_SAMPLING
3090 // reset the sampling arrays
3091 bamboo_dtlb_sampling_reset();
3092 #endif // GC_CACHE_SAMPLING
3093 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3094 // zero out the gccachesamplingtbl
3095 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3096 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3097 size_cachesamplingtbl_local_r);
3099 #endif // GC_CACHE_ADAPT
3101 // invalidate all shared mem pointers
3102 bamboo_cur_msp = NULL;
3103 bamboo_smem_size = 0;
3104 bamboo_smem_zero_top = NULL;
3107 if(FINISHPHASE == gcphase) {
3113 gcprocessing = false;
3114 #ifdef RAWPATH // TODO GC_DEBUG
3115 printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3117 } // void gc_collect(struct garbagelist * stackptr)
3119 inline void gc_nocollect(struct garbagelist * stackptr) {
3120 // inform the master that this core is at a gc safe point and is ready to
3122 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3123 self_numreceiveobjs, false);
3126 if(INITPHASE == gcphase) {
3130 #ifdef RAWPATH // TODO GC_DEBUG
3131 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3134 #ifdef GC_CACHE_ADAPT
3135 // prepare for cache adaption:
3136 cacheAdapt_gc(true);
3137 #endif // GC_CACHE_ADAPT
3138 //send init finish msg to core coordinator
3139 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3142 if(MARKPHASE == gcphase) {
3146 #ifdef RAWPATH // TODO GC_DEBUG
3147 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3148 udn_tile_coord_y());
3150 mark(true, stackptr);
3151 #ifdef RAWPATH // TODO GC_DEBUG
3152 printf("(%x,%x) Finish mark phase, wait for flush\n",
3153 udn_tile_coord_x(), udn_tile_coord_y());
3156 // non-gc core collector routine
3158 if(FLUSHPHASE == gcphase) {
3162 #ifdef RAWPATH // TODO GC_DEBUG
3163 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3164 udn_tile_coord_y());
3167 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3168 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3169 gc_num_liveobj, gc_num_forwardobj, false);
3172 #endif // GC_PROFLIE
3174 #ifdef RAWPATH // TODO GC_DEBUG
3175 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3176 udn_tile_coord_y());
3179 #ifdef GC_CACHE_ADAPT
3181 if(PREFINISHPHASE == gcphase) {
3185 #ifdef RAWPATH // TODO GC_DEBUG
3186 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3187 udn_tile_coord_y());
3189 // cache adapt phase
3190 cacheAdapt_mutator();
3191 cacheAdapt_gc(false);
3192 //send init finish msg to core coordinator
3193 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3194 #ifdef RAWPATH // TODO GC_DEBUG
3195 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3196 udn_tile_coord_y());
3198 #ifdef GC_CACHE_SAMPLING
3199 // reset the sampling arrays
3200 bamboo_dtlb_sampling_reset();
3201 #endif // GC_CACHE_SAMPLING
3202 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3203 // zero out the gccachesamplingtbl
3204 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3205 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3206 size_cachesamplingtbl_local_r);
3208 #endif // GC_CACHE_ADAPT
3210 // invalidate all shared mem pointers
3211 bamboo_cur_msp = NULL;
3212 bamboo_smem_size = 0;
3213 bamboo_smem_zero_top = NULL;
3216 if(FINISHPHASE == gcphase) {
3221 gcprocessing = false;
3222 #ifdef RAWPATH // TODO GC_DEBUG
3223 printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3225 } // void gc_collect(struct garbagelist * stackptr)
3227 inline void gc_master(struct garbagelist * stackptr) {
3229 gcphase = INITPHASE;
3231 waitconfirm = false;
3235 // Note: all cores need to init gc including non-gc cores
3236 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3237 // send GC init messages to all cores
3238 send_msg_1(i, GCSTARTINIT, false);
3240 bool isfirst = true;
3241 bool allStall = false;
3243 #ifdef GC_CACHE_ADAPT
3244 // prepare for cache adaption:
3245 cacheAdapt_gc(true);
3246 #endif // GC_CACHE_ADAPT
3248 #ifdef RAWPATH // TODO GC_DEBUG
3249 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3250 udn_tile_coord_y());
3253 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3255 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3256 if(gc_checkAllCoreStatus_I()) {
3257 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3260 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3265 #ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
3266 gc_output_cache_sampling();
3267 #endif // GC_CACHE_ADAPT
3268 #ifdef RAWPATH // TODO GC_DEBUG
3269 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3270 udn_tile_coord_y());
3272 // all cores have finished compacting
3273 // restore the gcstatus of all cores
3274 // Note: all cores have to do mark including non-gc cores
3275 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3276 for(i = 1; i < NUMCORESACTIVE; ++i) {
3277 gccorestatus[i] = 1;
3278 // send GC start messages to all cores
3279 send_msg_1(i, GCSTART, false);
3282 gcphase = MARKPHASE;
3284 while(MARKPHASE == gcphase) {
3285 mark(isfirst, stackptr);
3292 } // while(MARKPHASE == gcphase)
3293 // send msgs to all cores requiring large objs info
3294 // Note: only need to ask gc cores, non-gc cores do not host any objs
3295 numconfirm = NUMCORES4GC - 1;
3296 for(i = 1; i < NUMCORES4GC; ++i) {
3297 send_msg_1(i, GCLOBJREQUEST, false);
3299 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3304 } // wait for responses
3305 // check the heaptop
3306 if(gcheaptop < gcmarkedptrbound) {
3307 gcheaptop = gcmarkedptrbound;
3312 #ifdef RAWPATH // TODO GC_DEBUG
3313 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3314 udn_tile_coord_y());
3316 // cache all large objs
3318 // no enough space to cache large objs
3319 BAMBOO_EXIT(0xb006);
3321 // predict number of blocks to fill for each core
3323 int numpbc = loadbalance(&tmpheaptop);
3325 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3326 #ifdef RAWPATH // TODO GC_DEBUG
3327 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3328 udn_tile_coord_y());
3330 //int tmptopptr = 0;
3331 //BASEPTR(gctopcore, 0, &tmptopptr);
3333 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3334 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3335 GC_BAMBOO_DEBUGPRINT(0xabab);
3336 GC_BAMBOO_DEBUGPRINT_REG(tmptopptr);
3337 for(i = 0; i < NUMCORES4GC; ++i) {
3339 BASEPTR(i, numpbc, &tmpcoreptr);
3340 //send start compact messages to all cores
3341 //TODO bug here, do not know if the direction is positive or negtive?
3342 if (tmpcoreptr < tmpheaptop) {
3343 gcstopblock[i] = numpbc + 1;
3344 if(i != STARTUPCORE) {
3345 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3347 gcblock2fill = numpbc+1;
3348 } // if(i != STARTUPCORE)
3350 gcstopblock[i] = numpbc;
3351 if(i != STARTUPCORE) {
3352 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3354 gcblock2fill = numpbc;
3355 } // if(i != STARTUPCORE)
3357 GC_BAMBOO_DEBUGPRINT(0xf000+i);
3358 GC_BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3359 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3360 // init some data strutures for compact phase
3362 gcfilledblocks[i] = 0;
3363 gcrequiredmems[i] = 0;
3373 bool finalcompact = false;
3374 // initialize pointers for comapcting
3375 struct moveHelper * orig =
3376 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3377 struct moveHelper * to =
3378 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3379 initOrig_Dst(orig, to);
3380 int filledblocks = 0;
3381 INTPTR heaptopptr = 0;
3382 bool finishcompact = false;
3383 bool iscontinue = true;
3384 bool localcompact = true;
3385 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3386 if((!finishcompact) && iscontinue) {
3387 GC_BAMBOO_DEBUGPRINT(0xe001);
3388 GC_BAMBOO_DEBUGPRINT_REG(numpbc);
3389 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3390 finishcompact = compacthelper(orig, to, &filledblocks,
3391 &heaptopptr, &localcompact);
3392 GC_BAMBOO_DEBUGPRINT(0xe002);
3393 GC_BAMBOO_DEBUGPRINT_REG(finishcompact);
3394 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3395 GC_BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3396 GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3397 GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3400 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3401 if(gc_checkCoreStatus_I()) {
3402 // all cores have finished compacting
3403 // restore the gcstatus of all cores
3404 for(i = 0; i < NUMCORES4GC; ++i) {
3405 gccorestatus[i] = 1;
3407 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3410 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3411 // check if there are spare mem for pending move requires
3412 if(COMPACTPHASE == gcphase) {
3413 GC_BAMBOO_DEBUGPRINT(0xe003);
3414 resolvePendingMoveRequest();
3415 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3417 GC_BAMBOO_DEBUGPRINT(0xe004);
3420 } // if(gc_checkCoreStatus_I()) else ...
3423 GC_BAMBOO_DEBUGPRINT(0xe005);
3424 GC_BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3425 GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3426 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3427 to->ptr = gcmovestartaddr;
3428 to->numblocks = gcblock2fill - 1;
3429 to->bound = (to->numblocks==0) ?
3430 BAMBOO_SMEM_SIZE_L :
3431 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3432 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3433 to->offset = to->ptr - to->base;
3434 to->top = (to->numblocks==0) ?
3435 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3437 to->offset = BAMBOO_CACHE_LINE_SIZE;
3438 to->ptr += to->offset; // for header
3439 to->top += to->offset;
3440 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3441 localcompact = true;
3443 localcompact = false;
3447 } else if(!finishcompact) {
3451 } // while(COMPACTPHASE == gcphase)
3455 #ifdef RAWPATH // TODO GC_DEBUG
3456 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3457 udn_tile_coord_y());
3461 #ifdef RAWPATH // TODO GC_DEBUG
3462 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3463 udn_tile_coord_y());
3470 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3471 // Note: all cores should flush their runtime data including non-gc
3473 for(i = 1; i < NUMCORES4GC; ++i) {
3474 // send start flush messages to all cores
3475 gccorestatus[i] = 1;
3476 send_msg_1(i, GCSTARTMAPINFO, false);
3481 #ifdef RAWPATH // TODO GC_DEBUG
3482 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3483 udn_tile_coord_y());
3487 #ifdef RAWPATH // TODO GC_DEBUG
3488 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3489 udn_tile_coord_y());
3491 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3492 while(MAPPHASE == gcphase) {
3493 // check the status of all cores
3494 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3495 if(gc_checkCoreStatus_I()) {
3496 // all cores have finished sending mapping info
3497 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3500 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3501 } // while(MAPPHASE == gcphase)
3503 gcphase = FLUSHPHASE;
3504 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3505 // Note: all cores should flush their runtime data including non-gc
3507 for(i = 1; i < NUMCORESACTIVE; ++i) {
3508 // send start flush messages to all cores
3509 gccorestatus[i] = 1;
3510 send_msg_1(i, GCSTARTFLUSH, false);
3515 #ifdef RAWPATH // TODO GC_DEBUG
3516 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3517 udn_tile_coord_y());
3522 #ifdef GC_CACHE_ADAPT
3523 // now the master core need to decide the new cache strategy
3524 cacheAdapt_master();
3525 #endif // GC_CACHE_ADAPT
3527 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3528 while(FLUSHPHASE == gcphase) {
3529 // check the status of all cores
3530 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3531 if(gc_checkAllCoreStatus_I()) {
3532 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3535 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3536 } // while(FLUSHPHASE == gcphase)
3537 #ifdef RAWPATH // TODO GC_DEBUG
3538 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3539 udn_tile_coord_y());
3542 #ifdef GC_CACHE_ADAPT
3546 gcphase = PREFINISHPHASE;
3547 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3548 // Note: all cores should flush their runtime data including non-gc
3550 for(i = 1; i < NUMCORESACTIVE; ++i) {
3551 // send start flush messages to all cores
3552 gccorestatus[i] = 1;
3553 send_msg_1(i, GCSTARTPREF, false);
3555 #ifdef RAWPATH // TODO GC_DEBUG
3556 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3557 udn_tile_coord_y());
3559 // cache adapt phase
3560 cacheAdapt_mutator();
3561 #ifdef GC_CACHE_ADAPT_OUTPUT
3562 bamboo_output_cache_policy();
3564 cacheAdapt_gc(false);
3566 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3567 while(PREFINISHPHASE == gcphase) {
3568 // check the status of all cores
3569 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3570 if(gc_checkAllCoreStatus_I()) {
3571 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3574 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3575 } // while(PREFINISHPHASE == gcphase)
3577 #ifdef GC_CACHE_SAMPLING
3578 // reset the sampling arrays
3579 bamboo_dtlb_sampling_reset();
3580 #endif // GC_CACHE_SAMPLING
3581 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3582 // zero out the gccachesamplingtbl
3583 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3584 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3585 size_cachesamplingtbl_local_r);
3586 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
3588 #endif // GC_CACHE_ADAPT
3590 gcphase = FINISHPHASE;
3592 // invalidate all shared mem pointers
3593 // put it here as it takes time to inform all the other cores to
3594 // finish gc and it might cause problem when some core resumes
3595 // mutator earlier than the other cores
3596 bamboo_cur_msp = NULL;
3597 bamboo_smem_size = 0;
3598 bamboo_smem_zero_top = NULL;
3603 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3604 for(i = 1; i < NUMCORESACTIVE; ++i) {
3605 // send gc finish messages to all cores
3606 send_msg_1(i, GCFINISH, false);
3607 gccorestatus[i] = 1;
3611 gcprocessing = false;
3612 #ifdef RAWPATH // TODO GC_DEBUG
3613 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
3614 udn_tile_coord_y());
3616 } // void gc_master(struct garbagelist * stackptr)
3618 inline bool gc(struct garbagelist * stackptr) {
3621 gcprocessing = false;
3625 #ifdef GC_CACHE_ADAPT
3626 #ifdef GC_CACHE_SAMPLING
3627 // disable the timer interrupt
3628 bamboo_mask_timer_intr();
3631 // core coordinator routine
3632 if(0 == BAMBOO_NUM_OF_CORE) {
3634 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
3635 udn_tile_coord_y());
3637 bool isallstall = true;
3638 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3639 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3641 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
3642 if(gccorestatus[ti] != 0) {
3648 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3649 // some of the cores are still executing the mutator and did not reach
3650 // some gc safe point, therefore it is not ready to do gc
3651 // in case that there are some pregc information msg lost, send a confirm
3652 // msg to the 'busy' core
3653 send_msg_1(ti, GCSTARTPRE, false);
3661 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
3662 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
3664 GC_BAMBOO_DEBUGPRINT(0xec04);
3665 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3666 sumsendobj += gcnumsendobjs[0][i];
3667 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
3668 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3669 GC_BAMBOO_DEBUGPRINT(0xec05);
3670 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3671 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3672 sumsendobj -= gcnumreceiveobjs[0][i];
3673 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
3674 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3675 GC_BAMBOO_DEBUGPRINT(0xec06);
3676 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3677 if(0 != sumsendobj) {
3678 // there were still some msgs on the fly, wait until there
3679 // are some update pregc information coming and check it again
3681 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3689 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3692 #ifdef RAWPATH // TODO GC_DEBUG
3693 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3695 // Zero out the remaining bamboo_cur_msp
3696 // Only zero out the first 4 bytes of the remaining memory
3697 // Move the operation here because for the GC_CACHE_ADAPT version,
3698 // we need to make sure during the gcinit phase the shared heap is not
3699 // touched. Otherwise, there would be problem when adapt the cache
3701 if((bamboo_cur_msp != 0)
3702 && (bamboo_smem_zero_top == bamboo_cur_msp)
3703 && (bamboo_smem_size > 0)) {
3704 *((int *)bamboo_cur_msp) = 0;
3706 #ifdef GC_FLUSH_DTLB
3707 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3708 BAMBOO_CLEAN_DTLB();
3709 gc_num_flush_dtlb++;
3712 #ifdef GC_CACHE_ADAPT
3713 #ifdef GC_CACHE_SAMPLING
3714 // get the sampling data
3715 bamboo_output_dtlb_sampling();
3716 #endif // GC_CACHE_SAMPLING
3717 #endif // GC_CACHE_ADAPT
3718 gcprocessing = true;
3719 gc_master(stackptr);
3720 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
3721 // Zero out the remaining bamboo_cur_msp
3722 // Only zero out the first 4 bytes of the remaining memory
3723 // Move the operation here because for the GC_CACHE_ADAPT version,
3724 // we need to make sure during the gcinit phase the shared heap is not
3725 // touched. Otherwise, there would be problem when adapt the cache
3727 if((bamboo_cur_msp != 0)
3728 && (bamboo_smem_zero_top == bamboo_cur_msp)
3729 && (bamboo_smem_size > 0)) {
3730 *((int *)bamboo_cur_msp) = 0;
3732 #ifdef GC_FLUSH_DTLB
3733 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3734 BAMBOO_CLEAN_DTLB();
3735 gc_num_flush_dtlb++;
3738 #ifdef GC_CACHE_ADAPT
3739 #ifdef GC_CACHE_SAMPLING
3740 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3741 // get the sampling data
3742 bamboo_output_dtlb_sampling();
3744 #endif // GC_CACHE_SAMPLING
3745 #endif // GC_CACHE_ADAPT
3746 gcprocessing = true;
3747 gc_collect(stackptr);
3749 // Zero out the remaining bamboo_cur_msp
3750 // Only zero out the first 4 bytes of the remaining memory
3751 // Move the operation here because for the GC_CACHE_ADAPT version,
3752 // we need to make sure during the gcinit phase the shared heap is not
3753 // touched. Otherwise, there would be problem when adapt the cache
3755 if((bamboo_cur_msp != 0)
3756 && (bamboo_smem_zero_top == bamboo_cur_msp)
3757 && (bamboo_smem_size > 0)) {
3758 *((int *)bamboo_cur_msp) = 0;
3760 #ifdef GC_FLUSH_DTLB
3761 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3762 BAMBOO_CLEAN_DTLB();
3763 gc_num_flush_dtlb++;
3766 #ifdef GC_CACHE_ADAPT
3767 #ifdef GC_CACHE_SAMPLING
3768 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3769 // get the sampling data
3770 bamboo_output_dtlb_sampling();
3772 #endif // GC_CACHE_SAMPLING
3773 #endif // GC_CACHE_ADAPT
3774 // not a gc core, should wait for gcfinish msg
3775 gcprocessing = true;
3776 gc_nocollect(stackptr);
3778 #ifdef GC_CACHE_ADAPT
3779 #ifdef GC_CACHE_SAMPLING
3780 // enable the timer interrupt
3781 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
3782 bamboo_unmask_timer_intr();
3783 #endif // GC_CACHE_SAMPLING
3784 #endif // GC_CACHE_ADAPT
3787 } // void gc(struct garbagelist * stackptr)
3790 inline void gc_profileStart(void) {
3791 if(!gc_infoOverflow) {
3792 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
3793 gc_infoArray[gc_infoIndex] = gcInfo;
3795 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
3799 inline void gc_profileItem(void) {
3800 if(!gc_infoOverflow) {
3801 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3802 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3806 inline void gc_profileEnd(void) {
3807 if(!gc_infoOverflow) {
3808 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3809 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3810 gcInfo->time[gcInfo->index++] = gc_num_livespace;
3811 gcInfo->time[gcInfo->index++] = gc_num_freespace;
3812 gcInfo->time[gcInfo->index++] = gc_num_lobj;
3813 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
3814 gcInfo->time[gcInfo->index++] = gc_num_obj;
3815 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
3816 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
3818 if(gc_infoIndex == GCINFOLENGTH) {
3819 gc_infoOverflow = true;
3820 //taskInfoIndex = 0;
3825 // output the profiling data
3826 void gc_outputProfileData() {
3829 unsigned long long totalgc = 0;
3831 #ifndef BAMBOO_MEMPROF
3832 BAMBOO_PRINT(0xdddd);
3834 // output task related info
3835 for(i= 0; i < gc_infoIndex; i++) {
3836 GCInfo * gcInfo = gc_infoArray[i];
3837 #ifdef BAMBOO_MEMPROF
3838 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
3840 unsigned long long tmp = 0;
3841 BAMBOO_PRINT(0xddda);
3842 for(j = 0; j < gcInfo->index - 7; j++) {
3843 BAMBOO_PRINT(gcInfo->time[j]);
3844 BAMBOO_PRINT(gcInfo->time[j]-tmp);
3845 BAMBOO_PRINT(0xdddb);
3846 tmp = gcInfo->time[j];
3848 tmp = (tmp-gcInfo->time[0]);
3849 BAMBOO_PRINT_REG(tmp);
3850 BAMBOO_PRINT(0xdddc);
3851 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
3852 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
3853 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
3854 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
3855 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
3856 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
3857 BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
3858 BAMBOO_PRINT(0xddde);
3862 #ifndef BAMBOO_MEMPROF
3863 BAMBOO_PRINT(0xdddf);
3865 BAMBOO_PRINT_REG(totalgc);
3867 if(gc_infoOverflow) {
3868 BAMBOO_PRINT(0xefee);
3871 #ifndef BAMBOO_MEMPROF
3872 BAMBOO_PRINT(0xeeee);
3875 #endif // #ifdef GC_PROFILE