*** empty log message ***
[IRC.git] / Robust / src / Runtime / multicoregarbage.c
1 #ifdef MULTICORE_GC
2 #include "runtime.h"
3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9
10 extern int corenum;
11 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
12 extern int numqueues[][NUMCLASSES];
13
14 extern struct genhashtable * activetasks;
15 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
16 extern struct taskparamdescriptor *currtpd;
17
18 extern struct LockValue runtime_locks[MAXTASKPARAMS];
19 extern int runtime_locklen;
20
21 struct pointerblock {
22   void * ptrs[NUMPTRS];
23   struct pointerblock *next;
24 };
25
26 struct pointerblock *gchead=NULL;
27 int gcheadindex=0;
28 struct pointerblock *gctail=NULL;
29 int gctailindex=0;
30 struct pointerblock *gctail2=NULL;
31 int gctailindex2=0;
32 struct pointerblock *gcspare=NULL;
33
34 #define NUMLOBJPTRS 20
35
36 struct lobjpointerblock {
37   void * lobjs[NUMLOBJPTRS];
38         //void * dsts[NUMLOBJPTRS];
39         int lengths[NUMLOBJPTRS];
40         //void * origs[NUMLOBJPTRS];
41         int hosts[NUMLOBJPTRS];
42   struct lobjpointerblock *next;
43         struct lobjpointerblock *prev;
44 };
45
46 struct lobjpointerblock *gclobjhead=NULL;
47 int gclobjheadindex=0;
48 struct lobjpointerblock *gclobjtail=NULL;
49 int gclobjtailindex=0;
50 struct lobjpointerblock *gclobjtail2=NULL;
51 int gclobjtailindex2=0;
52 struct lobjpointerblock *gclobjspare=NULL;
53
54 #ifdef GC_DEBUG
55 // dump whole mem in blocks
56 inline void dumpSMem() {
57         int block = 0;
58         int sblock = 0;
59         int j = 0;
60         int i = 0;
61         int coren = 0;
62         int x = 0;
63         int y = 0;
64         tprintf("Dump shared mem: \n");
65         // reserved blocks for sblocktbl
66         tprintf("++++ reserved sblocks ++++ \n");
67         for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
68                 tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
69             *((int *)(i)), *((int *)(i + 4)), 
70                                                 *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
71                                                 *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
72                                                 *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
73                                                 *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
74                                                 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
75                                                 *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
76                                                 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
77         }
78         sblock = gcreservedsb;
79         bool advanceblock = false;
80         // remaining memory
81         for(i=gcbaseva;i<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE;i+=4*16){
82                 advanceblock = false;
83                 // computing sblock # and block #, core coordinate (x,y) also
84                 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
85                         // finished a sblock
86                         if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
87                                 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
88                                         // finished a block
89                                         block++;
90                                         advanceblock = true;
91                                 }
92                         } else {
93                                 // finished a block
94                                 block++;
95                                 advanceblock = true;
96                         }
97                         // compute core #
98                         if(advanceblock) {
99                                 coren = gc_block2core[block%(NUMCORES4GC*2)];
100                         }
101                         // compute core coordinate
102                         /*int tmpcore = coren;
103                         if((NUMCORES4GC==62) && (tmpcore > 5)) {
104                                 tmpcore+=2;
105                         }
106                         x = tmpcore/bamboo_width;
107                         y = tmpcore%bamboo_width;*/
108                         x = bamboo_cpu2coords[coren*2]; 
109                         y = bamboo_cpu2coords[coren*2+1];
110                         tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", 
111                                             block, sblock++, x, y, 
112                                                         (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
113                 }
114                 j++;
115     tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
116             *((int *)(i)), *((int *)(i + 4)), 
117                                                 *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
118                                                 *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
119                                                 *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
120                                                 *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
121                                                 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
122                                                 *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
123                                                 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
124         }
125         tprintf("\n");
126 }
127 #endif
128
129 // should be invoked with interruption closed
130 inline void gc_enqueue_I(void *ptr) {
131 #ifdef DEBUG
132         BAMBOO_DEBUGPRINT(0xe601);
133         BAMBOO_DEBUGPRINT_REG(ptr);
134 #endif
135   if (gcheadindex==NUMPTRS) {
136     struct pointerblock * tmp;
137     if (gcspare!=NULL) {
138       tmp=gcspare;
139       gcspare=NULL;
140     } else {
141       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
142                 } // if (gcspare!=NULL)
143     gchead->next=tmp;
144     gchead=tmp;
145     gcheadindex=0;
146   } // if (gcheadindex==NUMPTRS)
147   gchead->ptrs[gcheadindex++]=ptr;
148 #ifdef DEBUG
149         BAMBOO_DEBUGPRINT(0xe602);
150 #endif
151 } // void gc_enqueue_I(void *ptr)
152
153 // dequeue and destroy the queue
154 inline void * gc_dequeue() {
155   if (gctailindex==NUMPTRS) {
156     struct pointerblock *tmp=gctail;
157     gctail=gctail->next;
158     gctailindex=0;
159     if (gcspare!=NULL) {
160       RUNFREE(tmp);
161                 } else {
162       gcspare=tmp;
163                 } // if (gcspare!=NULL)
164   } // if (gctailindex==NUMPTRS)
165   return gctail->ptrs[gctailindex++];
166 } // void * gc_dequeue()
167
168 // dequeue and do not destroy the queue
169 inline void * gc_dequeue2() {
170         if (gctailindex2==NUMPTRS) {
171     struct pointerblock *tmp=gctail2;
172     gctail2=gctail2->next;
173     gctailindex2=0;
174   } // if (gctailindex2==NUMPTRS)
175   return gctail2->ptrs[gctailindex2++];
176 } // void * gc_dequeue2() 
177
178 inline int gc_moreItems() {
179   if ((gchead==gctail)&&(gctailindex==gcheadindex))
180     return 0;
181   return 1;
182 } // int gc_moreItems() 
183
184 inline int gc_moreItems2() {
185   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
186     return 0;
187   return 1;
188 } // int gc_moreItems2()
189
190 // should be invoked with interruption closed
191 // enqueue a large obj: start addr & length
192 inline void gc_lobjenqueue_I(void *ptr, 
193                                          int length, 
194                                                                                          int host) {
195 #ifdef DEBUG
196         BAMBOO_DEBUGPRINT(0xe901);
197 #endif
198   if (gclobjheadindex==NUMLOBJPTRS) {
199     struct lobjpointerblock * tmp;
200     if (gclobjspare!=NULL) {
201       tmp=gclobjspare;
202       gclobjspare=NULL;
203     } else {
204       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
205                 } // if (gclobjspare!=NULL)
206     gclobjhead->next=tmp;
207                 tmp->prev = gclobjhead;
208     gclobjhead=tmp;
209     gclobjheadindex=0;
210   } // if (gclobjheadindex==NUMLOBJPTRS)
211   gclobjhead->lobjs[gclobjheadindex]=ptr;
212         gclobjhead->lengths[gclobjheadindex]=length;
213         gclobjhead->hosts[gclobjheadindex++]=host;
214 #ifdef DEBUG
215         BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
216         BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
217         BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
218 #endif
219 } // void gc_lobjenqueue_I(void *ptr...)
220
221 // dequeue and destroy the queue
222 inline void * gc_lobjdequeue(int * length,
223                                          int * host) {
224   if (gclobjtailindex==NUMLOBJPTRS) {
225     struct lobjpointerblock *tmp=gclobjtail;
226     gclobjtail=gclobjtail->next;
227     gclobjtailindex=0;
228                 gclobjtail->prev = NULL;
229     if (gclobjspare!=NULL) {
230       RUNFREE(tmp);
231                 } else {
232       gclobjspare=tmp;
233                         tmp->next = NULL;
234                         tmp->prev = NULL;
235                 } // if (gclobjspare!=NULL)
236   } // if (gclobjtailindex==NUMLOBJPTRS)
237         if(length != NULL) {
238                 *length = gclobjtail->lengths[gclobjtailindex];
239         }
240         if(host != NULL) {
241                 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
242         }
243   return gclobjtail->lobjs[gclobjtailindex++];
244 } // void * gc_lobjdequeue()
245
246 inline int gc_lobjmoreItems() {
247   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
248     return 0;
249   return 1;
250 } // int gc_lobjmoreItems()
251
252 // dequeue and don't destroy the queue
253 inline void gc_lobjdequeue2() {
254   if (gclobjtailindex2==NUMLOBJPTRS) {
255     gclobjtail2=gclobjtail2->next;
256     gclobjtailindex2=1;
257   } else {
258                 gclobjtailindex2++;
259         }// if (gclobjtailindex2==NUMLOBJPTRS)
260 } // void * gc_lobjdequeue2()
261
262 inline int gc_lobjmoreItems2() {
263   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
264     return 0;
265   return 1;
266 } // int gc_lobjmoreItems2()
267
268 // 'reversly' dequeue and don't destroy the queue
269 inline void gc_lobjdequeue3() {
270   if (gclobjtailindex2==0) {
271     gclobjtail2=gclobjtail2->prev;
272     gclobjtailindex2=NUMLOBJPTRS-1;
273   } else {
274                 gclobjtailindex2--;
275         }// if (gclobjtailindex2==NUMLOBJPTRS)
276 } // void * gc_lobjdequeue3()
277
278 inline int gc_lobjmoreItems3() {
279   if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
280     return 0;
281   return 1;
282 } // int gc_lobjmoreItems3()
283
284 inline void gc_lobjqueueinit4() {
285         gclobjtail2 = gclobjtail;
286         gclobjtailindex2 = gclobjtailindex;
287 } // void gc_lobjqueueinit2()
288
289 inline void * gc_lobjdequeue4(int * length,
290                                           int * host) {
291   if (gclobjtailindex2==NUMLOBJPTRS) {
292     gclobjtail2=gclobjtail2->next;
293     gclobjtailindex2=0;
294   } // if (gclobjtailindex==NUMLOBJPTRS)
295         if(length != NULL) {
296                 *length = gclobjtail2->lengths[gclobjtailindex2];
297         }
298         if(host != NULL) {
299                 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
300         }
301   return gclobjtail2->lobjs[gclobjtailindex2++];
302 } // void * gc_lobjdequeue()
303
304 inline int gc_lobjmoreItems4() {
305   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
306     return 0;
307   return 1;
308 } // int gc_lobjmoreItems(
309
310 INTPTR gccurr_heapbound = 0;
311
312 inline void gettype_size(void * ptr, 
313                                      int * ttype, 
314                                                                          int * tsize) {
315         int type = ((int *)ptr)[0];
316         int size = 0;
317         if(type < NUMCLASSES) {
318                 // a normal object
319                 size = classsize[type];
320         } else {        
321                 // an array 
322                 struct ArrayObject *ao=(struct ArrayObject *)ptr;
323                 int elementsize=classsize[type];
324                 int length=ao->___length___; 
325                 size=sizeof(struct ArrayObject)+length*elementsize;
326         } // if(type < NUMCLASSES)
327         *ttype = type;
328         *tsize = size;
329 }
330
331 inline bool isLarge(void * ptr, 
332                                 int * ttype, 
333                                                                                 int * tsize) {
334 #ifdef DEBUG
335                 BAMBOO_DEBUGPRINT(0xe701);
336                 BAMBOO_DEBUGPRINT_REG(ptr);
337 #endif
338         // check if a pointer is referring to a large object
339         gettype_size(ptr, ttype, tsize);
340 #ifdef DEBUG
341         BAMBOO_DEBUGPRINT(*tsize);
342 #endif
343         int bound = (BAMBOO_SMEM_SIZE);
344         if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
345                 bound = (BAMBOO_SMEM_SIZE_L);
346         }
347         if((((int)ptr-gcbaseva)%(bound))==0) {
348                 // ptr is a start of a block
349 #ifdef DEBUG
350                 BAMBOO_DEBUGPRINT(0xe702);
351                 BAMBOO_DEBUGPRINT(1);
352 #endif
353                 return true;
354         }
355         if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
356                 // it acrosses the boundary of current block
357 #ifdef DEBUG
358                 BAMBOO_DEBUGPRINT(0xe703);
359                 BAMBOO_DEBUGPRINT(1);
360 #endif
361                 return true;
362         }
363 #ifdef DEBUG
364                 BAMBOO_DEBUGPRINT(0);
365 #endif
366         return false;
367 } // bool isLarge(void * ptr, int * ttype, int * tsize)
368
369 inline int hostcore(void * ptr) {
370         // check the host core of ptr
371         int host = 0;
372         RESIDECORE(ptr, &host);
373 #ifdef DEBUG
374         BAMBOO_DEBUGPRINT(0xedd0);
375         BAMBOO_DEBUGPRINT_REG(ptr);
376         BAMBOO_DEBUGPRINT_REG(host);
377 #endif
378         return host;
379 } // int hostcore(void * ptr)
380
381 inline bool isLocal(void * ptr) {
382         // check if a pointer is in shared heap on this core
383         return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
384 } // bool isLocal(void * ptr)
385
386 inline bool gc_checkCoreStatus() {
387         bool allStall = true;
388         for(int i = 0; i < NUMCORES4GC; ++i) {
389                 if(gccorestatus[i] != 0) {
390                         allStall = false;
391                         break;
392                 } // if(gccorestatus[i] != 0)
393         } // for(i = 0; i < NUMCORES4GC; ++i)
394         return allStall;
395 }
396
397 inline void checkMarkStatue() {
398 #ifdef DEBUG
399         BAMBOO_DEBUGPRINT(0xee01);
400 #endif
401         int i;
402         if((!waitconfirm) || 
403                         (waitconfirm && (numconfirm == 0))) {
404 #ifdef DEBUG
405                 BAMBOO_DEBUGPRINT(0xee02);
406 #endif
407                 BAMBOO_START_CRITICAL_SECTION_STATUS();  
408                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
409                 gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
410                 gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
411                 // check the status of all cores
412                 bool allStall = gc_checkCoreStatus();
413 #ifdef DEBUG
414                 BAMBOO_DEBUGPRINT(0xee03);
415 #endif
416                 if(allStall) {
417 #ifdef DEBUG
418                         BAMBOO_DEBUGPRINT(0xee04);
419 #endif
420                         // ask for confirm
421                         if(!waitconfirm) {
422 #ifdef DEBUG
423                                 BAMBOO_DEBUGPRINT(0xee05);
424 #endif
425                                 // the first time found all cores stall
426                                 // send out status confirm msg to all other cores
427                                 // reset the corestatus array too
428                                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
429                                 waitconfirm = true;
430                                 numconfirm = NUMCORES4GC - 1;
431                                 for(i = 1; i < NUMCORES4GC; ++i) {      
432                                         gccorestatus[i] = 1;
433                                         // send mark phase finish confirm request msg to core i
434                                         send_msg_1(i, GCMARKCONFIRM, false);
435                                 } // for(i = 1; i < NUMCORES4GC; ++i) 
436                         } else {
437                                 // check if the sum of send objs and receive obj are the same
438                                 // yes->check if the info is the latest; no->go on executing
439                                 int sumsendobj = 0;
440                                 for(i = 0; i < NUMCORES4GC; ++i) {
441                                         sumsendobj += gcnumsendobjs[i];
442                                 } // for(i = 0; i < NUMCORES4GC; ++i) 
443 #ifdef DEBUG
444                                 BAMBOO_DEBUGPRINT(0xee06);
445                                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
446 #endif
447                                 for(i = 0; i < NUMCORES4GC; ++i) {
448                                         sumsendobj -= gcnumreceiveobjs[i];
449                                 } // for(i = 0; i < NUMCORES4GC; ++i) 
450 #ifdef DEBUG
451                                 BAMBOO_DEBUGPRINT(0xee07);
452                                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
453 #endif
454                                 if(0 == sumsendobj) {
455 #ifdef DEBUG
456                                         BAMBOO_DEBUGPRINT(0xee08);
457 #endif
458                                         // all the core status info are the latest
459                                         // stop mark phase
460                                         gcphase = COMPACTPHASE;
461                                         // restore the gcstatus for all cores
462                                         for(i = 0; i < NUMCORES4GC; ++i) {
463                                                 gccorestatus[i] = 1;
464                                         } // for(i = 0; i < NUMCORES4GC; ++i)
465                                 } // if(0 == sumsendobj)
466                         } // if(!gcwaitconfirm) else()
467                 } // if(allStall)
468                 BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
469         } // if((!waitconfirm)...
470 #ifdef DEBUG
471         BAMBOO_DEBUGPRINT(0xee0a);
472 #endif
473 } // void checkMarkStatue()
474
475 inline bool preGC() {
476         // preparation for gc
477         // make sure to clear all incoming msgs espacially transfer obj msgs
478 #ifdef DEBUG
479         BAMBOO_DEBUGPRINT(0xec01);
480 #endif
481         int i;
482         if((!waitconfirm) || 
483                                                   (waitconfirm && (numconfirm == 0))) {
484                 // send out status confirm msgs to all cores to check if there are
485                 // transfer obj msgs on-the-fly
486                 waitconfirm = true;
487                 numconfirm = NUMCORESACTIVE - 1;
488                 for(i = 1; i < NUMCORESACTIVE; ++i) {   
489                         corestatus[i] = 1;
490                         // send status confirm msg to core i
491                         send_msg_1(i, STATUSCONFIRM, false);
492                 } // for(i = 1; i < NUMCORESACTIVE; ++i)
493
494 #ifdef DEBUG
495                 BAMBOO_DEBUGPRINT(0xec02);
496 #endif
497                 while(true) {
498                         if(numconfirm == 0) {
499                                 break;
500                         }
501                 } // wait for confirmations
502                 waitconfirm = false;
503                 numconfirm = 0;
504 #ifdef DEBUG
505                 BAMBOO_DEBUGPRINT(0xec03);
506 #endif
507                 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
508                 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
509                 int sumsendobj = 0;
510 #ifdef DEBUG
511                 BAMBOO_DEBUGPRINT(0xec04);
512 #endif
513                 for(i = 0; i < NUMCORESACTIVE; ++i) {
514                         sumsendobj += numsendobjs[i];
515 #ifdef DEBUG
516                         BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
517 #endif
518                 } // for(i = 1; i < NUMCORESACTIVE; ++i)
519 #ifdef DEBUG
520         BAMBOO_DEBUGPRINT(0xec05);
521         BAMBOO_DEBUGPRINT_REG(sumsendobj);
522 #endif
523                 for(i = 0; i < NUMCORESACTIVE; ++i) {
524                         sumsendobj -= numreceiveobjs[i];
525 #ifdef DEBUG
526                         BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
527 #endif
528                 } // for(i = 1; i < NUMCORESACTIVE; ++i)
529 #ifdef DEBUG
530                 BAMBOO_DEBUGPRINT(0xec06);
531                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
532 #endif
533                 if(0 == sumsendobj) {
534                         return true;
535                 } else {
536                         // still have some transfer obj msgs on-the-fly, can not start gc
537                         return false;
538                 } // if(0 == sumsendobj) 
539         } else {
540 #ifdef DEBUG
541                 BAMBOO_DEBUGPRINT(0xec07);
542 #endif
543                 // previously asked for status confirmation and do not have all the 
544                 // confirmations yet, can not start gc
545                 return false;
546         } // if((!waitconfirm) || 
547 } // bool preGC()
548
549 inline void initGC() {
550         int i;
551         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
552                 for(i = 0; i < NUMCORES4GC; ++i) {
553                         gccorestatus[i] = 1;
554                         gcnumsendobjs[i] = 0; 
555                         gcnumreceiveobjs[i] = 0;
556                         gcloads[i] = 0;
557                         gcrequiredmems[i] = 0;
558                         gcfilledblocks[i] = 0;
559                         gcstopblock[i] = 0;
560                 } // for(i = 0; i < NUMCORES4GC; ++i)
561                 gcheaptop = 0;
562                 gctopcore = 0;
563                 gctopblock = 0;
564         } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) 
565         gcself_numsendobjs = 0;
566         gcself_numreceiveobjs = 0;
567         gcmarkedptrbound = 0;
568         gcobj2map = 0;
569         gcmappedobj = 0;
570         gcismapped = false;
571         gcnumlobjs = 0;
572         gcmovestartaddr = 0;
573         gctomove = false;
574         gcblock2fill = 0;
575         gcmovepending = 0;
576         gccurr_heaptop = 0;
577         gcdstcore = 0;
578
579         // initialize queue
580         if (gchead==NULL) {
581                 gcheadindex=gctailindex=gctailindex2 = 0;
582                 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
583         } else {
584                 gctailindex = gctailindex2 = gcheadindex;
585                 gctail = gctail2 = gchead;
586         }
587
588         // initialize the large obj queues
589         if (gclobjhead==NULL) {
590                 gclobjheadindex=0;
591                 gclobjtailindex=0;
592                 gclobjtailindex2 = 0;
593                 gclobjhead=gclobjtail=gclobjtail2=
594                         RUNMALLOC(sizeof(struct lobjpointerblock));
595         } else {
596                 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
597                 gclobjtail = gclobjtail2 = gclobjhead;
598         }
599         gclobjhead->next = gclobjhead->prev = NULL;
600
601         freeRuntimeHash(gcpointertbl);
602         gcpointertbl = allocateRuntimeHash(20);
603         //freeMGCHash(gcpointertbl);
604         //gcpointertbl = allocateMGCHash(20);
605         //mgchashreset();
606         
607         freeMGCHash(gcforwardobjtbl);
608         gcforwardobjtbl = allocateMGCHash(20, 3);
609
610         memset(gcsmemtbl, '\0', sizeof(int)*gcnumblock);
611 } // void initGC()
612
613 // compute load balance for all cores
614 inline int loadbalance(int * heaptop) {
615         // compute load balance
616         int i;
617
618         // get the total loads
619         int tloads = gcloads[STARTUPCORE];
620         for(i = 1; i < NUMCORES4GC; i++) {
621                 tloads += gcloads[i];
622         }
623         *heaptop = gcbaseva + tloads;
624 #ifdef DEBUG
625         BAMBOO_DEBUGPRINT(0xdddd);
626         BAMBOO_DEBUGPRINT_REG(tloads);
627         BAMBOO_DEBUGPRINT_REG(*heaptop);
628 #endif
629         int b = 0;
630         BLOCKINDEX(*heaptop, &b);
631         int numbpc = b / NUMCORES4GC; // num of blocks per core
632 #ifdef DEBUG
633         BAMBOO_DEBUGPRINT_REG(b);
634         BAMBOO_DEBUGPRINT_REG(numbpc);
635 #endif
636         gctopblock = b;
637         RESIDECORE(heaptop, &gctopcore);
638 #ifdef DEBUG
639         BAMBOO_DEBUGPRINT_REG(gctopcore);
640 #endif
641         return numbpc;
642 } // void loadbalance(int * heaptop)
643
644 inline bool cacheLObjs() {
645         // check the total mem size need for large objs
646         int sumsize = 0;
647         int size = 0;
648 #ifdef DEBUG
649         BAMBOO_DEBUGPRINT(0xe801);
650 #endif
651         gclobjtail2 = gclobjtail;
652         gclobjtailindex2 = gclobjtailindex;
653         int tmp_lobj = 0;
654         int tmp_len = 0;
655         int tmp_host = 0;
656         // compute total mem size required and sort the lobjs in ascending order
657         while(gc_lobjmoreItems2()){
658                 gc_lobjdequeue2();
659                 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
660                 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
661                 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
662                 sumsize += tmp_len;
663 #ifdef DEBUG
664                 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
665                 BAMBOO_DEBUGPRINT_REG(tmp_len);
666                 BAMBOO_DEBUGPRINT_REG(sumsize);
667 #endif
668                 int i = gclobjtailindex2-1;
669                 struct lobjpointerblock * tmp_block = gclobjtail2;
670                 // find the place to insert
671                 while(true) {
672                         if(i == 0) {
673                                 if(tmp_block->prev == NULL) {
674                                         break;
675                                 }
676                                 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
677                                         tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
678                                         tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
679                                         tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
680                                         tmp_block = tmp_block->prev;
681                                         i = NUMLOBJPTRS-1;
682                                 } else {
683                                         break;
684                                 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
685                         } else {
686                                 if(tmp_block->lobjs[i-1] > tmp_lobj) {
687                                         tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
688                                         tmp_block->lengths[i] = tmp_block->lengths[i-1];
689                                         tmp_block->hosts[i] = tmp_block->hosts[i-1];
690                                         i--;
691                                 } else {
692                                         break;
693                                 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
694                         } // if(i ==0 ) else {}
695                 } // while(true)
696                 // insert it
697                 if(i != gclobjtailindex2 - 1) {
698                         tmp_block->lobjs[i] = tmp_lobj;
699                         tmp_block->lengths[i] = tmp_len;
700                         tmp_block->hosts[i] = tmp_host;
701                 }
702         } // while(gc_lobjmoreItems2())
703
704         // check if there are enough space to cache these large objs
705         INTPTR dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - sumsize;
706         if(gcheaptop > dst) {
707                 // do not have enough room to cache large objs
708 #ifdef DEBUG
709         BAMBOO_DEBUGPRINT(0xe802);
710         BAMBOO_DEBUGPRINT_REG(dst);
711         BAMBOO_DEBUGPRINT_REG(gcheaptop);
712 #endif
713                 return false;
714         }
715 #ifdef DEBUG
716         BAMBOO_DEBUGPRINT(0xe803);
717         BAMBOO_DEBUGPRINT_REG(dst);
718         BAMBOO_DEBUGPRINT_REG(gcheaptop);
719 #endif
720
721         gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
722         // cache the largeObjs to the top of the shared heap
723         //gclobjtail2 = gclobjtail;
724         //gclobjtailindex2 = gclobjtailindex;
725         dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
726         while(gc_lobjmoreItems3()) {
727                 gc_lobjdequeue3();
728                 size = gclobjtail2->lengths[gclobjtailindex2];
729                 // set the mark field to , indicating that this obj has been moved 
730                 // and need to be flushed
731                 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
732                 dst -= size;
733                 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
734                         memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
735                 } else {
736                   memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
737                 }
738 #ifdef DEBUG
739                 BAMBOO_DEBUGPRINT(0x804);
740                 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
741                 BAMBOO_DEBUGPRINT(dst);
742                 BAMBOO_DEBUGPRINT_REG(size);
743                 BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
744                 BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
745 #endif
746         }
747         return true;
748 } // void cacheLObjs()
749
750 // NOTE: the free mem chunks should be maintained in an ordered linklist
751 // the listtop param always specify current list tail
752
753 // update the gcsmemtbl to record current shared mem usage
754 void updateSmemTbl(int coren,
755                                int localtop) {
756         int ltopcore = 0;
757         int bound = BAMBOO_SMEM_SIZE_L;
758         BLOCKINDEX(localtop, &ltopcore);
759         if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
760                 bound = BAMBOO_SMEM_SIZE;
761         }
762         int load = (localtop-gcbaseva)%bound;
763         int i = 0;
764         int j = 0;
765         int toset = 0;
766         do{
767                 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
768                 if(toset < ltopcore) {
769                         gcsmemtbl[toset]=
770                                 (toset<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
771                 } else if(toset == ltopcore) {
772                         gcsmemtbl[toset] = load;
773                         break;
774                 } else {
775                         break;
776                 }
777                 i++;
778                 if(i == 2) {
779                         i = 0;
780                         j++;
781                 }
782         }while(true);
783 } // void updateSmemTbl(int, int)
784
785 inline struct freeMemItem * addFreeMemItem(int ptr,
786                                                        int size,
787                                                                                                                                                                          struct freeMemItem * listtail,
788                                                                                                                                                                          bool* sethead) {
789         struct freeMemItem * tochange = listtail;
790         if(*sethead) {
791                 if(tochange->next == NULL) {
792                         if(bamboo_free_mem_list->backuplist != NULL) {
793                                 tochange->next = bamboo_free_mem_list->backuplist;
794                                 bamboo_free_mem_list->backuplist = NULL;
795                         } else {
796                                 tochange->next = 
797                                         (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
798                         }
799                 } // if(tochange->next == NULL)
800                 tochange = tochange->next;
801         } else {
802                 *sethead = true;
803         } // if(sethead)
804         tochange->ptr = ptr;
805         tochange->size = size;
806         BLOCKINDEX(ptr, &(tochange->startblock));
807         BLOCKINDEX(ptr+size-1, &(tochange->endblock));
808         // zero out all these spare memory
809         // note that, leave the mem starting from heaptop, as it caches large objs
810         // zero out these cache later when moving large obj
811         {
812                 INTPTR tmp = tochange->ptr;
813                 unsigned long long int size = tochange->size;
814                 while(size > 0) {
815                         int tsize = size>1024*1024*1024?1024*1024*1024:size;
816                         memset(tmp, '\0', tsize);
817                         size -= tsize;
818                         tmp += tsize;
819                 }
820         }
821         return tochange;
822 } // struct freeMemItem * addFreeMemItem(int,int,struct freeMemItem*,bool*, int)
823
824 inline void moveLObjs() {
825 #ifdef DEBUG
826         BAMBOO_DEBUGPRINT(0xea01);
827 #endif
828         // find current heap top
829         // flush all gcloads to indicate the real heap top on one core
830         // previous it represents the next available ptr on a core
831         if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L))) 
832                         && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
833                 // edge of a block, check if this is exactly the heaptop
834                 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
835                 gcloads[0]+=(gcfilledblocks[0]>1?
836                                 (BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
837         } 
838         updateSmemTbl(0, gcloads[0]);
839 #ifdef DEBUG
840   BAMBOO_DEBUGPRINT(0xea02);
841         BAMBOO_DEBUGPRINT_REG(gcloads[0]);
842         BAMBOO_DEBUGPRINT_REG(gcsmemtbl[0]);
843 #endif
844         for(int i = 1; i < NUMCORES4GC; i++) {
845                 int tmptop = 0;
846 #ifdef DEBUG
847                 BAMBOO_DEBUGPRINT(0xf000+i);
848                 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
849                 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
850 #endif
851                 if((gcfilledblocks[i] > 0) 
852                                 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
853                         // edge of a block, check if this is exactly the heaptop
854                         BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
855                         gcloads[i]
856                                 +=(gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
857                         tmptop = gcloads[i];
858                 } 
859                 updateSmemTbl(i, gcloads[i]);
860 #ifdef DEBUG
861                 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
862 #endif
863         } // for(int i = 1; i < NUMCORES4GC; i++) {
864
865         // find current heap top
866         // TODO
867         // a bug here: when using local allocation, directly move large objects
868         // to the highest free chunk might not be memory efficient
869         int tmpheaptop = 0;
870         int size = 0;
871         int bound = 0;
872         int i = 0;
873         for(i = gcnumblock-1; i >= 0; i--) {
874                 if(gcsmemtbl[i] > 0) {
875                         break;
876                 }
877         }
878         if(i == -1) {
879                 tmpheaptop = gcbaseva;
880         } else {
881                 tmpheaptop = gcbaseva+gcsmemtbl[i]+((i<NUMCORES4GC)?
882                                 (BAMBOO_SMEM_SIZE_L*i):
883                                 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
884         }
885
886         // move large objs from gcheaptop to tmpheaptop
887         // write the header first
888         int tomove = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - gcheaptop;
889 #ifdef DEBUG
890         BAMBOO_DEBUGPRINT(0xea03);
891         BAMBOO_DEBUGPRINT_REG(tomove);
892         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
893         BAMBOO_DEBUGPRINT_REG(gcheaptop);
894 #endif
895         // flush the sbstartbl
896         memset(&(gcsbstarttbl[gcreservedsb]), '\0', 
897                 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
898         if(tomove == 0) {
899                 gcheaptop = tmpheaptop;
900         } else {
901                 // check how many blocks it acrosses
902                 int remain = tmpheaptop-gcbaseva;
903                 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb; //number of the sblock
904                 int b = 0; // number of the block
905                 BLOCKINDEX(tmpheaptop, &b);
906                 // check the remaining space in this block
907                 bound = (BAMBOO_SMEM_SIZE);
908                 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
909                         bound = (BAMBOO_SMEM_SIZE_L);
910                 }
911                 remain = bound - remain%bound;
912
913 #ifdef DEBUG
914                 BAMBOO_DEBUGPRINT(0xea04);
915 #endif
916                 size = 0;
917                 int isize = 0;
918                 int host = 0;
919                 int ptr = 0;
920                 int base = tmpheaptop;
921                 int cpysize = 0;
922                 remain -= BAMBOO_CACHE_LINE_SIZE;
923                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
924                 gc_lobjqueueinit4();
925                 while(gc_lobjmoreItems4()) {
926                         ptr = (int)(gc_lobjdequeue4(&size, &host));
927                         ALIGNSIZE(size, &isize);
928                         if(remain < isize) {
929                                 // this object acrosses blocks
930                                 if(cpysize > 0) {
931                                         // close current block, fill its header
932                                         memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
933                                         *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
934                                         gcsmemtbl[b]+=BAMBOO_CACHE_LINE_SIZE; // add the size of the header
935                                         cpysize = 0;
936                                         base = tmpheaptop;
937                                         if(remain == 0) {
938                                                 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
939                                                                                  BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
940                                         } 
941                                         remain -= BAMBOO_CACHE_LINE_SIZE;
942                                         tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
943                                         BLOCKINDEX(tmpheaptop, &b);
944                                         sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
945                                 } // if(cpysize > 0)
946
947                                 // move the large obj
948                                 if((int)gcheaptop < (int)(tmpheaptop)+size) {
949                                   memmove(tmpheaptop, gcheaptop, size);
950                                 } else {
951                                         memcpy(tmpheaptop, gcheaptop, size);
952                                 }
953                                 // fill the remaining space with -2 padding
954                                 memset(tmpheaptop+size, -2, isize-size);
955                                 // zero out original mem caching the lobj
956                                 memset(gcheaptop, '\0', size);
957 #ifdef DEBUG
958                                 BAMBOO_DEBUGPRINT(0xea05);
959                                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
960                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
961                                 BAMBOO_DEBUGPRINT_REG(size);
962                                 BAMBOO_DEBUGPRINT_REG(isize);
963                                 BAMBOO_DEBUGPRINT_REG(base);
964 #endif
965                                 gcheaptop += size;
966                                 if(host == BAMBOO_NUM_OF_CORE) {
967                                         //if(ptr != tmpheaptop) {
968                                         BAMBOO_START_CRITICAL_SECTION();
969                                         //mgchashInsert_I(ptr, tmpheaptop);
970                                         RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
971                                         //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
972                                         BAMBOO_CLOSE_CRITICAL_SECTION();
973                                         //}
974 #ifdef DEBUG
975                                         BAMBOO_DEBUGPRINT(0xcdca);
976                                         BAMBOO_DEBUGPRINT_REG(ptr);
977                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
978 #endif
979                                 } else {
980                                         // send the original host core with the mapping info
981                                         send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
982 #ifdef DEBUG
983                                         BAMBOO_DEBUGPRINT(0xcdcb);
984                                         BAMBOO_DEBUGPRINT_REG(ptr);
985                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
986 #endif
987                                 } // if(host == BAMBOO_NUM_OF_CORE) else ...
988                                 tmpheaptop += isize;
989
990                                 // set the gcsbstarttbl and gcsmemtbl
991                                 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
992                                 for(int k = 1; k < tmpsbs; k++) {
993                                         gcsbstarttbl[sb+k] = (INTPTR)(-1);
994                                 }
995                                 sb += tmpsbs;
996                                 bound = (b<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
997                                 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
998                                 for(; b < tmpsbs; b++) {
999                                         gcsmemtbl[b] = bound;
1000                                         if(b==NUMCORES4GC-1) {
1001                                                 bound = BAMBOO_SMEM_SIZE;
1002                                         }
1003                                 }
1004                                 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
1005                                         gcsbstarttbl[sb] = (INTPTR)(-1);
1006                                         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
1007                                                                          BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1008                                         gcsmemtbl[b] = bound;
1009                                 } else {
1010                                         gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1011                                         remain = tmpheaptop-gcbaseva;
1012                                         gcsmemtbl[b] = remain%bound;
1013                                         remain = bound - gcsmemtbl[b];
1014                                 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1015
1016                                 // close current block and fill the header
1017                                 memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1018                                 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1019                                 cpysize = 0;
1020                                 base = tmpheaptop;
1021                                 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1022                                         // fill with 0 in case
1023                                         memset(tmpheaptop, '\0', remain);
1024                                 }
1025                                 remain -= BAMBOO_CACHE_LINE_SIZE;
1026                                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1027                         } else {
1028                                 remain -= isize;
1029                                 // move the large obj
1030                                 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1031                                 memmove(tmpheaptop, gcheaptop, size);
1032                                 } else {
1033                                         memcpy(tmpheaptop, gcheaptop, size);
1034                                 }
1035                                 // fill the remaining space with -2 padding
1036                                 memset(tmpheaptop+size, -2, isize-size);
1037                                 // zero out original mem caching the lobj
1038                                 memset(gcheaptop, '\0', size);
1039 #ifdef DEBUG
1040                                 BAMBOO_DEBUGPRINT(0xea06);
1041                                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1042                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1043                                 BAMBOO_DEBUGPRINT_REG(size);
1044                                 BAMBOO_DEBUGPRINT_REG(isize);
1045 #endif
1046
1047                                 gcheaptop += size;
1048                                 cpysize += isize;
1049                                 if(host == BAMBOO_NUM_OF_CORE) {
1050                                         //if(ptr != tmpheaptop) {
1051                                         BAMBOO_START_CRITICAL_SECTION();
1052                                         //mgchashInsert_I(ptr, tmpheaptop);
1053                                         RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1054                                         //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1055                                         BAMBOO_CLOSE_CRITICAL_SECTION();
1056                                         //}
1057 #ifdef DEBUG
1058                                         BAMBOO_DEBUGPRINT(0xcdcc);
1059                                         BAMBOO_DEBUGPRINT_REG(ptr);
1060                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1061                                         BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1062 #endif
1063                                 } else {
1064                                         // send the original host core with the mapping info
1065                                         send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1066 #ifdef DEBUG
1067                                         BAMBOO_DEBUGPRINT(0xcdcd);
1068                                         BAMBOO_DEBUGPRINT_REG(ptr);
1069                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1070 #endif
1071                                 } // if(host == BAMBOO_NUM_OF_CORE) else ...
1072                                 tmpheaptop += isize;
1073
1074                                 // update gcsmemtbl
1075                                 gcsmemtbl[b] += isize;
1076                         } // if(remain < isize) else ...
1077                 } // while(gc_lobjmoreItems())
1078                 if(cpysize > 0) {
1079                         // close current block, fill the header
1080                         memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1081                         *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1082                         gcsmemtbl[b] += BAMBOO_CACHE_LINE_SIZE; // add the size of the header
1083                 } else {
1084                         tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1085                 }
1086                 gcheaptop = tmpheaptop;
1087
1088         } // if(tomove == 0)
1089
1090 #ifdef DEBUG
1091         BAMBOO_DEBUGPRINT(0xea07);
1092         BAMBOO_DEBUGPRINT_REG(gcheaptop);
1093 #endif
1094
1095         // update the free mem list
1096         // create new free mem list according to gcsmemtbl
1097         bool sethead = false;
1098         if(bamboo_free_mem_list->head == NULL) {
1099                 bamboo_free_mem_list->head = bamboo_free_mem_list->backuplist;
1100                 bamboo_free_mem_list->backuplist = NULL;
1101         }
1102         struct freeMemItem * tochange = bamboo_free_mem_list->head;
1103         if(tochange == NULL) {
1104                 bamboo_free_mem_list->head = tochange = 
1105                         (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
1106                 tochange->next = NULL;
1107         }
1108         int startptr = 0;
1109         size = 0;
1110         bound = BAMBOO_SMEM_SIZE_L;
1111         for(i = 0; i < gcnumblock-bamboo_reserved_smem; i++) {
1112                 if(gcsmemtbl[i] < bound) {
1113                         if(gcsmemtbl[i] == 0) {
1114                                 // blank one
1115                                 if(startptr == 0) {
1116                                         // a start of a new free mem chunk
1117                                         startptr = gcbaseva+((i<NUMCORES4GC)?(i*BAMBOO_SMEM_SIZE_L)
1118                                                         :(BAMBOO_LARGE_SMEM_BOUND+(i-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
1119                                 } // if(startptr == 0) 
1120                                 size += bound;
1121                         } else {
1122                                 if(startptr != 0) {
1123                                         // the end of previous free mem chunk
1124                                         tochange = addFreeMemItem(startptr,size,tochange,&sethead);
1125                                         //startptr = 0;
1126                                         //size = 0;
1127                                 }
1128                                 // start of a new free mem chunk
1129                                 startptr = gcbaseva+((i<NUMCORES4GC)?(i*BAMBOO_SMEM_SIZE_L)
1130                                       :((BAMBOO_LARGE_SMEM_BOUND+(i-NUMCORES4GC)*BAMBOO_SMEM_SIZE)))
1131                                                          +gcsmemtbl[i];
1132                                 size = bound-gcsmemtbl[i];
1133                         } // if(gcsmemtbl[i] == 0) else
1134                 } else {
1135                         if(startptr != 0) {
1136                                 // the end of previous free mem chunk
1137                                 tochange = addFreeMemItem(startptr,size,tochange,&sethead);
1138                                 startptr = 0;
1139                                 size = 0;
1140                         } // if(startptr != 0) {
1141                 } // if(gcsmemtbl[i] < bound) else
1142                 if(i == NUMCORES4GC-1) {
1143                         bound = BAMBOO_SMEM_SIZE;
1144                 }
1145         } // for(i = 0; i < gcnumblock; i++) {
1146         if(startptr != 0) {
1147                 tochange = addFreeMemItem(startptr, size, tochange, &sethead);
1148                 startptr = 0;
1149                 size = 0;
1150         }
1151         // remove the remaing list to the back up list, only remain one node, 
1152         // free the others
1153         if(tochange->next != NULL) {
1154                 struct freeMemItem * blist = NULL;
1155                 if(bamboo_free_mem_list->backuplist != NULL) {
1156                         blist = tochange->next;
1157                 } else {
1158                         bamboo_free_mem_list->backuplist = tochange->next;
1159                         blist = bamboo_free_mem_list->backuplist->next;
1160                         bamboo_free_mem_list->backuplist->next = NULL;
1161                 }
1162                 tochange->next = NULL;
1163                 while(blist != NULL) {
1164                         struct freeMemItem * tmp = blist;
1165                         blist = blist->next;
1166                         RUNFREE(tmp);
1167                 } // if(blist != NULL)
1168         }
1169
1170 #ifdef DEBUG
1171         BAMBOO_DEBUGPRINT(0xea08);
1172         BAMBOO_DEBUGPRINT_REG(gcheaptop);
1173 #endif
1174 } // void moveLObjs()
1175
1176 inline void markObj(void * objptr) {
1177         if(objptr == NULL) {
1178                 return;
1179         }
1180         if(ISSHAREDOBJ(objptr)) {
1181                 int host = hostcore(objptr);
1182                 if(BAMBOO_NUM_OF_CORE == host) {
1183                         // on this core
1184                         if(((int *)objptr)[6] == INIT) {
1185                                 // this is the first time that this object is discovered,
1186                                 // set the flag as DISCOVERED
1187                                 ((int *)objptr)[6] = DISCOVERED;
1188                                 BAMBOO_START_CRITICAL_SECTION();
1189                                 gc_enqueue_I(objptr);  
1190                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1191                         }
1192                 } else {
1193 #ifdef DEBUG
1194                         BAMBOO_DEBUGPRINT(0xbbbb);
1195                         BAMBOO_DEBUGPRINT_REG(host);
1196                         BAMBOO_DEBUGPRINT_REG(objptr);
1197 #endif
1198                         // check if this obj has been forwarded
1199                         if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1200                                 // send a msg to host informing that objptr is active
1201                                 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1202                                 gcself_numsendobjs++;
1203                                 MGCHashadd(gcforwardobjtbl, (int)objptr);
1204                         }
1205                 }
1206         } else {
1207                 BAMBOO_START_CRITICAL_SECTION();
1208                 gc_enqueue_I(objptr);
1209                 BAMBOO_CLOSE_CRITICAL_SECTION();
1210         } // if(ISSHAREDOBJ(objptr))
1211 } // void markObj(void * objptr) 
1212
1213 // enqueue root objs
1214 inline void tomark(struct garbagelist * stackptr) {
1215         if(MARKPHASE != gcphase) {
1216 #ifdef DEBUG
1217                 BAMBOO_DEBUGPRINT_REG(gcphase);
1218 #endif
1219                 BAMBOO_EXIT(0xb101);
1220         }
1221         gcbusystatus = true;
1222         gcnumlobjs = 0;
1223         
1224         int i,j;
1225         // enqueue current stack 
1226         while(stackptr!=NULL) {
1227 #ifdef DEBUG
1228                 BAMBOO_DEBUGPRINT(0xe501);
1229                 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1230                 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1231                 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1232 #endif
1233                 for(i=0; i<stackptr->size; i++) {
1234                         if(stackptr->array[i] != NULL) {
1235                                 //BAMBOO_START_CRITICAL_SECTION();
1236                                 //gc_enqueue_I(stackptr->array[i]);
1237                                 //BAMBOO_CLOSE_CRITICAL_SECTION();
1238                           markObj(stackptr->array[i]);
1239                         }
1240                 }
1241                 stackptr=stackptr->next;
1242         }
1243
1244 #ifdef DEBUG
1245         BAMBOO_DEBUGPRINT(0xe503);
1246 #endif
1247         // enqueue objectsets
1248         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1249                 for(i=0; i<NUMCLASSES; i++) {
1250                         struct parameterwrapper ** queues = 
1251                                 objectqueues[BAMBOO_NUM_OF_CORE][i];
1252                         int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1253                         for(j = 0; j < length; ++j) {
1254                                 struct parameterwrapper * parameter = queues[j];
1255                                 struct ObjectHash * set=parameter->objectset;
1256                                 struct ObjectNode * ptr=set->listhead;
1257                                 while(ptr!=NULL) {
1258                                         //BAMBOO_START_CRITICAL_SECTION();
1259                                         //gc_enqueue_I((void *)ptr->key);
1260                                         //BAMBOO_CLOSE_CRITICAL_SECTION();
1261                                         markObj((void *)ptr->key);
1262                                         ptr=ptr->lnext;
1263                                 }
1264                         }
1265                 }
1266         }
1267
1268         // euqueue current task descriptor
1269         if(currtpd != NULL) {
1270 #ifdef DEBUG
1271                 BAMBOO_DEBUGPRINT(0xe504);
1272 #endif
1273                 for(i=0; i<currtpd->numParameters; i++) {
1274                         //BAMBOO_START_CRITICAL_SECTION();
1275                         //gc_enqueue_I(currtpd->parameterArray[i]);
1276                         //BAMBOO_CLOSE_CRITICAL_SECTION();
1277                         markObj(currtpd->parameterArray[i]);
1278                 }
1279         }
1280
1281 #ifdef DEBUG
1282         BAMBOO_DEBUGPRINT(0xe505);
1283 #endif
1284         // euqueue active tasks
1285         if(activetasks != NULL) {
1286                 struct genpointerlist * ptr=activetasks->list;
1287                 while(ptr!=NULL) {
1288                         struct taskparamdescriptor *tpd=ptr->src;
1289                         int i;
1290                         for(i=0; i<tpd->numParameters; i++) {
1291                                 //BAMBOO_START_CRITICAL_SECTION();
1292                                 //gc_enqueue_I(tpd->parameterArray[i]);
1293                                 //BAMBOO_CLOSE_CRITICAL_SECTION();
1294                                 markObj(tpd->parameterArray[i]);
1295                         }
1296                         ptr=ptr->inext;
1297                 }
1298         }
1299
1300 #ifdef DEBUG
1301         BAMBOO_DEBUGPRINT(0xe506);
1302 #endif
1303         // enqueue cached transferred obj
1304         struct QueueItem * tmpobjptr =  getHead(&objqueue);
1305         while(tmpobjptr != NULL) {
1306                 struct transObjInfo * objInfo = 
1307                         (struct transObjInfo *)(tmpobjptr->objectptr); 
1308                 //BAMBOO_START_CRITICAL_SECTION();
1309                 //gc_enqueue_I(objInfo->objptr);
1310                 //BAMBOO_CLOSE_CRITICAL_SECTION();
1311                 markObj(objInfo->objptr);
1312                 tmpobjptr = getNextQueueItem(tmpobjptr);
1313         }
1314
1315 #ifdef DEBUG
1316         BAMBOO_DEBUGPRINT(0xe507);
1317 #endif
1318         // enqueue cached objs to be transferred
1319         struct QueueItem * item = getHead(totransobjqueue);
1320         while(item != NULL) {
1321                 struct transObjInfo * totransobj = 
1322                         (struct transObjInfo *)(item->objectptr);
1323                 //BAMBOO_START_CRITICAL_SECTION();
1324                 //gc_enqueue_I(totransobj->objptr);
1325                 //BAMBOO_CLOSE_CRITICAL_SECTION();
1326                 markObj(totransobj->objptr);
1327                 item = getNextQueueItem(item);
1328         } // while(item != NULL)
1329
1330 #ifdef DEBUG
1331         BAMBOO_DEBUGPRINT(0xe508);
1332 #endif
1333         // enqueue lock related info
1334         for(i = 0; i < runtime_locklen; ++i) {
1335          //gc_enqueue_I((void *)(runtime_locks[i].redirectlock));
1336          markObj((void *)(runtime_locks[i].redirectlock));
1337          if(runtime_locks[i].value != NULL) {
1338                  //gc_enqueue_I((void *)(runtime_locks[i].value));
1339                  markObj((void *)(runtime_locks[i].value));
1340          }
1341         }
1342
1343 } // void tomark(struct garbagelist * stackptr)
1344
1345 inline void mark(bool isfirst, 
1346                              struct garbagelist * stackptr) {
1347 #ifdef DEBUG
1348         BAMBOO_DEBUGPRINT(0xed01);
1349 #endif
1350         if(isfirst) {
1351 #ifdef DEBUG
1352                 BAMBOO_DEBUGPRINT(0xed02);
1353 #endif
1354                 // enqueue root objs
1355                 tomark(stackptr);
1356                 gccurr_heaptop = 0; // record the size of all active objs in this core
1357                                   // aligned but does not consider block boundaries
1358                 gcmarkedptrbound = 0;
1359         }
1360 #ifdef DEBUG
1361         BAMBOO_DEBUGPRINT(0xed03);
1362 #endif
1363         int isize = 0;
1364         bool checkfield = true;
1365         bool sendStall = false;
1366         // mark phase
1367         while(MARKPHASE == gcphase) {
1368 #ifdef DEBUG 
1369                 BAMBOO_DEBUGPRINT(0xed04);
1370 #endif
1371                 while(gc_moreItems2()) {
1372 #ifdef DEBUG 
1373                         BAMBOO_DEBUGPRINT(0xed05);
1374 #endif
1375                         sendStall = false;
1376                         gcbusystatus = true;
1377                         checkfield = true;
1378                         void * ptr = gc_dequeue2();
1379
1380 #ifdef DEBUG 
1381                         BAMBOO_DEBUGPRINT_REG(ptr);
1382 #endif
1383                         int size = 0;
1384                         int isize = 0;
1385                         int type = 0;
1386                         // check if it is a shared obj
1387                         if(ISSHAREDOBJ(ptr)) {
1388                                 // a shared obj, check if it is a local obj on this core
1389                                 int host = hostcore(ptr);
1390                                 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1391                                 if(islocal) {
1392                                         bool isnotmarked = (((int *)ptr)[6] == DISCOVERED);
1393                                         if(isLarge(ptr, &type, &size) && isnotmarked) {
1394                                                 // ptr is a large object and not marked or enqueued
1395 #ifdef DEBUG
1396                                                 BAMBOO_DEBUGPRINT(0xecec);
1397                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1398                                                 BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1399 #endif
1400                                                 BAMBOO_START_CRITICAL_SECTION();
1401                                                 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1402                                                 gcnumlobjs++;
1403                                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1404                                                 // mark this obj
1405                                                 ((int *)ptr)[6] = MARKED;
1406                                         } else if(isnotmarked) {
1407                                                 // ptr is an unmarked active object on this core
1408                                                 ALIGNSIZE(size, &isize);
1409                                                 gccurr_heaptop += isize;
1410 #ifdef DEBUG
1411                                                 BAMBOO_DEBUGPRINT(0xaaaa);
1412                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1413                                                 BAMBOO_DEBUGPRINT_REG(isize);
1414                                                 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1415 #endif
1416                                                 // mark this obj
1417                                                 ((int *)ptr)[6] = MARKED;
1418
1419                                                 if(ptr + size > gcmarkedptrbound) {
1420                                                         gcmarkedptrbound = ptr + size;
1421                                                 } // if(ptr + size > gcmarkedptrbound)
1422                                         } else {
1423                                                 // ptr is not an active obj or has been marked
1424                                                 checkfield = false;
1425                                         }// if(isLarge(ptr, &type, &size)) else ...
1426                                 } else {
1427 #ifdef DEBUG
1428                                         BAMBOO_DEBUGPRINT(0xbbbb);
1429                                         BAMBOO_DEBUGPRINT_REG(host);
1430                                         BAMBOO_DEBUGPRINT_REG(ptr);
1431 #endif
1432                                         // check if this obj has been forwarded
1433                                         if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1434                                                 // send a msg to host informing that ptr is active
1435                                                 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1436                                                 gcself_numsendobjs++;
1437                                                 MGCHashadd(gcforwardobjtbl, (int)ptr);
1438                                         }
1439                                         checkfield = false;
1440                                 }// if(isLocal(ptr)) else ...
1441                         } // if(ISSHAREDOBJ(ptr))
1442 #ifdef DEBUG 
1443                         BAMBOO_DEBUGPRINT(0xed06);
1444 #endif
1445
1446                         if(checkfield) {
1447                                 // scan all pointers in ptr
1448                                 unsigned INTPTR * pointer;
1449                                 pointer=pointerarray[type];
1450                                 if (pointer==0) {
1451                                         /* Array of primitives */
1452                                         /* Do nothing */
1453                                 } else if (((INTPTR)pointer)==1) {
1454                                         /* Array of pointers */
1455                                         struct ArrayObject *ao=(struct ArrayObject *) ptr;
1456                                         int length=ao->___length___;
1457                                         int j;
1458                                         for(j=0; j<length; j++) {
1459                                                 void *objptr = 
1460                                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1461                                                 markObj(objptr);
1462                                         }
1463                                 } else {
1464                                         INTPTR size=pointer[0];
1465                                         int i;
1466                                         for(i=1; i<=size; i++) {
1467                                                 unsigned int offset=pointer[i];
1468                                                 void * objptr=*((void **)(((char *)ptr)+offset));
1469                                                 markObj(objptr);
1470                                         }
1471                                 } // if (pointer==0) else if ... else ...
1472                         } // if(checkfield)
1473                 } // while(gc_moreItems2())
1474 #ifdef DEBUG
1475                 BAMBOO_DEBUGPRINT(0xed07);
1476 #endif
1477                 gcbusystatus = false;
1478                 // send mark finish msg to core coordinator
1479                 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1480 #ifdef DEBUG
1481                         BAMBOO_DEBUGPRINT(0xed08);
1482 #endif
1483                         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1484                         gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
1485                         gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
1486                         gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1487                 } else {
1488                         if(!sendStall) {
1489 #ifdef DEBUG
1490                                 BAMBOO_DEBUGPRINT(0xed09);
1491 #endif
1492                                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1493                                                                          gcself_numsendobjs, gcself_numreceiveobjs, false);
1494                                 sendStall = true;
1495                         }
1496                 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1497 #ifdef DEBUG 
1498                 BAMBOO_DEBUGPRINT(0xed0a);
1499 #endif
1500
1501                 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1502 #ifdef DEBUG
1503                         BAMBOO_DEBUGPRINT(0xed0b);
1504 #endif
1505                         return;
1506                 }
1507         } // while(MARKPHASE == gcphase)
1508 } // mark()
1509
1510 inline void compact2Heaptophelper(int coren,
1511                                               int* p,
1512                                                                                                                                         int* numblocks,
1513                                                                                                                                         int* remain) {
1514         int b;
1515         int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1516         if(STARTUPCORE == coren) {
1517                 gctomove = true;
1518                 gcmovestartaddr = *p;
1519                 gcdstcore = gctopcore;
1520                 gcblock2fill = *numblocks + 1;
1521         } else {
1522                 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1523         }
1524 #ifdef DEBUG
1525         BAMBOO_DEBUGPRINT_REG(coren);
1526         BAMBOO_DEBUGPRINT_REG(gctopcore);
1527         BAMBOO_DEBUGPRINT_REG(*p);
1528         BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1529 #endif
1530         if(memneed < *remain) {
1531 #ifdef DEBUG
1532                 BAMBOO_DEBUGPRINT(0xd104);
1533 #endif
1534                 *p = *p + memneed;
1535                 gcrequiredmems[coren] = 0;
1536                 gcloads[gctopcore] += memneed;
1537                 *remain = *remain - memneed;
1538         } else {
1539 #ifdef DEBUG
1540                 BAMBOO_DEBUGPRINT(0xd105);
1541 #endif
1542                 // next available block
1543                 *p = *p + *remain;
1544                 gcfilledblocks[gctopcore] += 1;
1545                 int newbase = 0;
1546                 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1547                 gcloads[gctopcore] = newbase;
1548                 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1549                 gcstopblock[gctopcore]++;
1550                 gctopcore = NEXTTOPCORE(gctopblock);
1551                 gctopblock++;
1552                 *numblocks = gcstopblock[gctopcore];
1553                 *p = gcloads[gctopcore];
1554                 BLOCKINDEX(*p, &b);
1555                 *remain=(b<NUMCORES4GC)?
1556                         ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1557            :((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1558 #ifdef DEBUG
1559                 BAMBOO_DEBUGPRINT(0xd106);
1560                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1561                 BAMBOO_DEBUGPRINT_REG(*p);
1562                 BAMBOO_DEBUGPRINT_REG(b);
1563                 BAMBOO_DEBUGPRINT_REG(*remain);
1564 #endif
1565         } // if(memneed < remain)
1566         gcmovepending--;
1567 } // void compact2Heaptophelper(int, int*, int*, int*)
1568
1569 inline void compact2Heaptop() {
1570         // no cores with spare mem and some cores are blocked with pending move
1571         // find the current heap top and make them move to the heap top
1572         int p;
1573         int numblocks = gcfilledblocks[gctopcore];
1574         //BASEPTR(gctopcore, numblocks, &p);
1575         p = gcloads[gctopcore];
1576         int b;
1577         BLOCKINDEX(p, &b);
1578         int remain = (b<NUMCORES4GC)?
1579                 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1580          :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1581         // check if the top core finishes
1582         if(gccorestatus[gctopcore] != 0) {
1583 #ifdef DEBUG
1584                 BAMBOO_DEBUGPRINT(0xd101);
1585                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1586 #endif
1587                 // let the top core finishes its own work first
1588                 compact2Heaptophelper(gctopcore, &p, &numblocks, &remain);
1589                 return;
1590         }
1591
1592 #ifdef DEBUG
1593         BAMBOO_DEBUGPRINT(0xd102);
1594         BAMBOO_DEBUGPRINT_REG(gctopcore);
1595         BAMBOO_DEBUGPRINT_REG(p);
1596         BAMBOO_DEBUGPRINT_REG(b);
1597         BAMBOO_DEBUGPRINT_REG(remain);
1598 #endif
1599         /*if((gctopcore == STARTUPCORE) && (b == 0)) {
1600                 remain -= gcreservedsb*BAMBOO_SMEM_SIZE;
1601                 p += gcreservedsb*BAMBOO_SMEM_SIZE;
1602         }*/
1603         for(int i = 0; i < NUMCORES4GC; i++) {
1604                 BAMBOO_START_CRITICAL_SECTION();
1605                 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1606 #ifdef DEBUG
1607                         BAMBOO_DEBUGPRINT(0xd103);
1608 #endif
1609                         compact2Heaptophelper(i, &p, &numblocks, &remain);
1610                         if(gccorestatus[gctopcore] != 0) {
1611 #ifdef DEBUG
1612                                 BAMBOO_DEBUGPRINT(0xd101);
1613                                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1614 #endif
1615                                 // the top core is not free now
1616                                 return;
1617                         }
1618                 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1619                 BAMBOO_CLOSE_CRITICAL_SECTION();
1620         } // for(i = 0; i < NUMCORES4GC; i++)
1621 #ifdef DEBUG
1622         BAMBOO_DEBUGPRINT(0xd106);
1623 #endif
1624 } // void compact2Heaptop()
1625
1626 inline void resolvePendingMoveRequest() {
1627 #ifdef DEBUG
1628         BAMBOO_DEBUGPRINT(0xeb01);
1629 #endif
1630 #ifdef DEBUG
1631                 BAMBOO_DEBUGPRINT(0xeeee);
1632                 for(int k = 0; k < NUMCORES4GC; k++) {
1633                         BAMBOO_DEBUGPRINT(0xf000+k);
1634                         BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1635                         BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1636                         BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1637                         BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1638                 }
1639                 BAMBOO_DEBUGPRINT(0xffff);
1640 #endif
1641         int i;
1642         int j;
1643         bool nosparemem = true;
1644         bool haspending = false;
1645         bool hasrunning = false;
1646         bool noblock = false;
1647         int dstcore = 0; // the core who need spare mem
1648         int sourcecore = 0; // the core who has spare mem
1649         for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC);) {
1650                 if(nosparemem) {
1651                         // check if there are cores with spare mem
1652                         if(gccorestatus[i] == 0) {
1653                                 // finished working, check if it still have spare mem
1654                                 if(gcfilledblocks[i] < gcstopblock[i]) {
1655                                         // still have spare mem
1656                                         nosparemem = false;
1657                                         sourcecore = i;
1658                                 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1659                         }
1660                         i++;
1661                 } // if(nosparemem)
1662                 if(!haspending) {
1663                         if(gccorestatus[j] != 0) {
1664                                 // not finished, check if it has pending move requests
1665                                 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1666                                         dstcore = j;
1667                                         haspending = true;
1668                                 } else {
1669                                         hasrunning = true;
1670                                 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1671                         } // if(gccorestatus[i] == 0) else ...
1672                         j++;
1673                 } // if(!haspending)
1674                 if(!nosparemem && haspending) {
1675                         // find match
1676                         int tomove = 0;
1677                         int startaddr = 0;
1678                         BAMBOO_START_CRITICAL_SECTION();
1679                         gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore, 
1680                                                                                gcrequiredmems[dstcore], 
1681                                                                                                                                                                                            &tomove, 
1682                                                                                                                                                                                            &startaddr);
1683                         BAMBOO_CLOSE_CRITICAL_SECTION();
1684 #ifdef DEBUG
1685                         BAMBOO_DEBUGPRINT(0xeb02);
1686                         BAMBOO_DEBUGPRINT_REG(sourcecore);
1687                         BAMBOO_DEBUGPRINT_REG(dstcore);
1688                         BAMBOO_DEBUGPRINT_REG(startaddr);
1689                         BAMBOO_DEBUGPRINT_REG(tomove);
1690 #endif
1691                         if(STARTUPCORE == dstcore) {
1692 #ifdef DEBUG
1693                                 BAMBOO_DEBUGPRINT(0xeb03);
1694 #endif
1695                                 gcdstcore = sourcecore;
1696                                 gctomove = true;
1697                                 gcmovestartaddr = startaddr;
1698                                 gcblock2fill = tomove;
1699                         } else {
1700 #ifdef DEBUG
1701                                 BAMBOO_DEBUGPRINT(0xeb04);
1702 #endif
1703                                 send_msg_4(dstcore, GCMOVESTART, sourcecore, 
1704                                                 startaddr, tomove, false);
1705                         }
1706                         gcmovepending--;
1707                         nosparemem = true;
1708                         haspending = false;
1709                         noblock = true;
1710                 }
1711         } // for(i = 0; i < NUMCORES4GC; i++)
1712 #ifdef DEBUG
1713         BAMBOO_DEBUGPRINT(0xcccc);
1714         BAMBOO_DEBUGPRINT_REG(hasrunning);
1715         BAMBOO_DEBUGPRINT_REG(haspending);
1716         BAMBOO_DEBUGPRINT_REG(noblock);
1717 #endif
1718
1719         if(!hasrunning && !noblock) {
1720                 gcphase = SUBTLECOMPACTPHASE;
1721                 compact2Heaptop();
1722         }
1723
1724 } // void resovePendingMoveRequest()
1725
1726 struct moveHelper {
1727         int numblocks; // block num for heap
1728         INTPTR base; // base virtual address of current heap block
1729         INTPTR ptr; // virtual address of current heap top
1730         int offset; // offset in current heap block
1731         int blockbase; // virtual address of current small block to check
1732         int blockbound; // bound virtual address of current small blcok
1733         int sblockindex; // index of the small blocks
1734         int top; // real size of current heap block to check
1735         int bound; // bound size of current heap block to check
1736 }; // struct moveHelper
1737
1738 // if out of boundary of valid shared memory, return false, else return true
1739 inline bool nextSBlock(struct moveHelper * orig) {
1740         orig->blockbase = orig->blockbound;
1741         bool sbchanged = false;
1742 #ifdef DEBUG 
1743         BAMBOO_DEBUGPRINT(0xecc0);
1744         BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1745         BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1746         BAMBOO_DEBUGPRINT_REG(orig->bound);
1747         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1748 #endif
1749 outernextSBlock:
1750         // check if across a big block
1751         if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound) 
1752                         || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0) 
1753                         || ((*((int*)orig->blockbase))==0)) {
1754 innernextSBlock:
1755                 // end of current heap block, jump to next one
1756                 orig->numblocks++;
1757 #ifdef DEBUG 
1758                 BAMBOO_DEBUGPRINT(0xecc1);
1759                 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1760 #endif
1761                 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1762 #ifdef DEBUG 
1763                 BAMBOO_DEBUGPRINT(orig->base);
1764 #endif
1765                 if(orig->base >= BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE) {
1766                         // out of boundary
1767                         orig->ptr = orig->base; // set current ptr to out of boundary too
1768                         return false;
1769                 }
1770                 orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1771                 orig->blockbase = orig->base;
1772                 orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
1773                 sbchanged = true;
1774         } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1775                 orig->sblockindex += 1;
1776                 sbchanged = true;
1777         } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1778
1779         // check if this sblock should be omitted or have special start point
1780         if(gcsbstarttbl[orig->sblockindex] == -1) {
1781                 // goto next sblock
1782 #ifdef DEBUG
1783                 BAMBOO_DEBUGPRINT(0xecc2);
1784 #endif
1785                 orig->sblockindex += 1;
1786                 orig->blockbase += BAMBOO_SMEM_SIZE;
1787                 goto outernextSBlock;
1788         } else if((gcsbstarttbl[orig->sblockindex] != 0) 
1789                         && (sbchanged)) {
1790                 // the first time to access this SBlock
1791 #ifdef DEBUG
1792                 BAMBOO_DEBUGPRINT(0xecc3);
1793 #endif
1794                 // not start from the very beginning
1795                 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1796         } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1797
1798         // setup information for this sblock
1799         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1800         orig->offset = BAMBOO_CACHE_LINE_SIZE;
1801         orig->ptr = orig->blockbase + orig->offset;
1802 #ifdef DEBUG
1803         BAMBOO_DEBUGPRINT(0xecc4);
1804         BAMBOO_DEBUGPRINT_REG(orig->base);
1805         BAMBOO_DEBUGPRINT_REG(orig->bound);
1806         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1807         BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1808         BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1809         BAMBOO_DEBUGPRINT_REG(orig->offset);
1810 #endif
1811         if(orig->ptr >= orig->bound) {
1812                 // met a lobj, move to next block
1813                 goto innernextSBlock;
1814         }
1815
1816         return true;
1817 } // bool nextSBlock(struct moveHelper * orig) 
1818
1819 // return false if there are no available data to compact
1820 inline bool initOrig_Dst(struct moveHelper * orig, 
1821                                      struct moveHelper * to) {
1822         // init the dst ptr
1823         to->numblocks = 0;
1824         to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1825         to->bound = BAMBOO_SMEM_SIZE_L;
1826         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1827
1828 #ifdef DEBUG
1829         BAMBOO_DEBUGPRINT(0xef01);
1830         BAMBOO_DEBUGPRINT_REG(to->base);
1831 #endif
1832         to->ptr = to->base + to->offset;
1833
1834         // init the orig ptr
1835         orig->numblocks = 0;
1836         orig->base = to->base;
1837         orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
1838         orig->blockbase = orig->base;
1839         orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
1840 #ifdef DEBUG 
1841         BAMBOO_DEBUGPRINT(0xef02);
1842         BAMBOO_DEBUGPRINT_REG(orig->base);
1843         BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1844         BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1845         BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1846 #endif
1847
1848         if(gcsbstarttbl[orig->sblockindex] == -1) {
1849 #ifdef DEBUG 
1850                 BAMBOO_DEBUGPRINT(0xef03);
1851 #endif
1852                 // goto next sblock
1853                 orig->blockbound = 
1854                         BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1855                 return nextSBlock(orig);
1856         } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1857 #ifdef DEBUG 
1858                 BAMBOO_DEBUGPRINT(0xef04);
1859 #endif
1860                 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1861         }
1862 #ifdef DEBUG 
1863         BAMBOO_DEBUGPRINT(0xef05);
1864 #endif
1865         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1866         orig->offset = BAMBOO_CACHE_LINE_SIZE;
1867         orig->ptr = orig->blockbase + orig->offset;
1868 #ifdef DEBUG
1869         BAMBOO_DEBUGPRINT(0xef06);
1870         BAMBOO_DEBUGPRINT_REG(orig->base);
1871 #endif
1872         return true;
1873 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to) 
1874
1875 inline void nextBlock(struct moveHelper * to) {
1876         to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1877         to->bound += BAMBOO_SMEM_SIZE;
1878         to->numblocks++;
1879         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1880         to->offset = BAMBOO_CACHE_LINE_SIZE;
1881         to->ptr = to->base + to->offset;
1882 } // void nextBlock(struct moveHelper * to)
1883
1884 // endaddr does not contain spaces for headers
1885 inline bool moveobj(struct moveHelper * orig, 
1886                                 struct moveHelper * to, 
1887                                                         int stopblock) {
1888         if(stopblock == 0) {
1889                 return true;
1890         }
1891
1892 #ifdef DEBUG
1893         BAMBOO_DEBUGPRINT(0xe201);
1894         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1895         BAMBOO_DEBUGPRINT_REG(to->ptr);
1896 #endif
1897
1898         int type = 0;
1899         int size = 0;
1900         int mark = 0;
1901         int isize = 0;
1902 innermoveobj:
1903         while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1904                 orig->ptr = (int*)(orig->ptr) + 1;
1905         }
1906         if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1907                 if(!nextSBlock(orig)) {
1908                         // finished, no more data
1909                         return true;
1910                 }
1911                 goto innermoveobj;
1912         }
1913 #ifdef DEBUG
1914         BAMBOO_DEBUGPRINT(0xe202);
1915         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1916         BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1917 #endif
1918         // check the obj's type, size and mark flag
1919         type = ((int *)(orig->ptr))[0];
1920         size = 0;
1921         if(type == 0) {
1922                 // end of this block, go to next one
1923                 if(!nextSBlock(orig)) {
1924                         // finished, no more data
1925                         return true;
1926                 }
1927                 goto innermoveobj;
1928         } else if(type < NUMCLASSES) {
1929                 // a normal object
1930                 size = classsize[type];
1931         } else {        
1932                 // an array 
1933                 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
1934                 int elementsize=classsize[type];
1935                 int length=ao->___length___; 
1936                 size=sizeof(struct ArrayObject)+length*elementsize;
1937         }
1938         mark = ((int *)(orig->ptr))[6];
1939 #ifdef DEBUG
1940         BAMBOO_DEBUGPRINT(0xe203);
1941         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1942         BAMBOO_DEBUGPRINT_REG(size);
1943 #endif
1944         ALIGNSIZE(size, &isize); // no matter is the obj marked or not
1945                                  // should be able to across it
1946         if(mark == MARKED) {
1947 #ifdef DEBUG
1948                 BAMBOO_DEBUGPRINT(0xe204);
1949 #endif
1950                 // marked obj, copy it to current heap top
1951                 // check to see if remaining space is enough
1952                 if(to->top + isize > to->bound) {
1953                         // fill 0 indicating the end of this block
1954                         memset(to->ptr,  '\0', to->bound - to->top);
1955                         // fill the header of this block and then go to next block
1956         to->offset += to->bound - to->top;
1957                         memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1958                         (*((int*)(to->base))) = to->offset;
1959                         nextBlock(to);
1960                         if(stopblock == to->numblocks) {
1961                                 // already fulfilled the block
1962                                 return true;
1963                         } // if(stopblock == to->numblocks)
1964                 } // if(to->top + isize > to->bound)
1965                 // set the mark field to 2, indicating that this obj has been moved 
1966                 // and need to be flushed
1967                 ((int *)(orig->ptr))[6] = COMPACTED;
1968                 if(to->ptr != orig->ptr) {
1969                         if((int)(orig->ptr) < (int)(to->ptr)+size) {
1970                           memmove(to->ptr, orig->ptr, size);
1971                         } else {
1972                                 memcpy(to->ptr, orig->ptr, size);
1973                         }
1974                         // fill the remaining space with -2
1975                         memset(to->ptr+size, -2, isize-size);
1976                 }
1977                 // store mapping info
1978                 BAMBOO_START_CRITICAL_SECTION();
1979                 //mgchashInsert_I(orig->ptr, to->ptr);
1980                 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr); 
1981                 //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
1982                 BAMBOO_CLOSE_CRITICAL_SECTION();
1983           //}
1984
1985 #ifdef DEBUG
1986                 BAMBOO_DEBUGPRINT(0xcdce);
1987                 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1988                 BAMBOO_DEBUGPRINT_REG(to->ptr);
1989 #endif
1990                 gccurr_heaptop -= isize;
1991                 to->ptr += isize;
1992                 to->offset += isize;
1993                 to->top += isize;
1994                 if(to->top == to->bound) {
1995                         // fill the header of this block and then go to next block
1996                         memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1997                         (*((int*)(to->base))) = to->offset;
1998                         nextBlock(to);
1999                 }
2000         } // if(mark == 1)
2001 #ifdef DEBUG
2002         BAMBOO_DEBUGPRINT(0xe205);
2003 #endif
2004         // move to next obj
2005         orig->ptr += size;
2006         
2007 #ifdef DEBUG
2008         BAMBOO_DEBUGPRINT_REG(isize);
2009         BAMBOO_DEBUGPRINT_REG(size);
2010         BAMBOO_DEBUGPRINT_REG(orig->ptr);
2011         BAMBOO_DEBUGPRINT_REG(orig->bound);
2012 #endif
2013         if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2014 #ifdef DEBUG
2015         BAMBOO_DEBUGPRINT(0xe206);
2016 #endif
2017                 if(!nextSBlock(orig)) {
2018                         // finished, no more data
2019                         return true;
2020                 }
2021         }
2022 #ifdef DEBUG
2023         BAMBOO_DEBUGPRINT(0xe207);
2024         BAMBOO_DEBUGPRINT_REG(orig->ptr);
2025 #endif
2026         return false;
2027 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2028
2029 // should be invoked with interrupt closed
2030 inline int assignSpareMem_I(int sourcecore,
2031                                         int * requiredmem,
2032                                                                                                           int * tomove,
2033                                                                                                           int * startaddr) {
2034         int b = 0;
2035         BLOCKINDEX(gcloads[sourcecore], &b);
2036         int boundptr = (b<NUMCORES4GC)?((b+1)*BAMBOO_SMEM_SIZE_L)
2037                 :(BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2038         int remain = boundptr - gcloads[sourcecore];
2039         int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2040         *startaddr = gcloads[sourcecore];
2041         *tomove = gcfilledblocks[sourcecore] + 1;
2042         if(memneed < remain) {
2043                 gcloads[sourcecore] += memneed;
2044                 return 0;
2045         } else {
2046                 // next available block
2047                 gcfilledblocks[sourcecore] += 1;
2048                 int newbase = 0;
2049                 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2050                 gcloads[sourcecore] = newbase;
2051                 return requiredmem-remain;
2052         }
2053 } // int assignSpareMem_I(int ,int * , int * , int * )
2054
2055 // should be invoked with interrupt closed
2056 inline bool gcfindSpareMem_I(int * startaddr,
2057                                          int * tomove,
2058                                                                                                    int * dstcore,
2059                                                                                                    int requiredmem,
2060                                                                                                    int requiredcore) {
2061         for(int k = 0; k < NUMCORES4GC; k++) {
2062                 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2063                         // check if this stopped core has enough mem
2064                         assignSpareMem_I(k, requiredmem, tomove, startaddr);
2065                         *dstcore = k;
2066                         return true;
2067                 }
2068         }
2069         // if can not find spare mem right now, hold the request
2070         gcrequiredmems[requiredcore] = requiredmem;
2071         gcmovepending++;
2072         return false;
2073 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2074
2075 inline bool compacthelper(struct moveHelper * orig,
2076                                       struct moveHelper * to,
2077                                                                                                         int * filledblocks,
2078                                                                                                         int * heaptopptr,
2079                                                                                                         bool * localcompact) {
2080         // scan over all objs in this block, compact the marked objs 
2081         // loop stop when finishing either scanning all active objs or 
2082         // fulfilled the gcstopblock
2083 #ifdef DEBUG
2084         BAMBOO_DEBUGPRINT(0xe101);
2085         BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2086         BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2087 #endif
2088 innercompact:
2089         while(orig->ptr < gcmarkedptrbound) {
2090                 bool stop = moveobj(orig, to, gcblock2fill);
2091                 if(stop) {
2092                         break;
2093                 }
2094         } 
2095         // if no objs have been compact, do nothing, 
2096         // otherwise, fill the header of this block
2097         if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2098                 memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2099                 (*((int*)(to->base))) = to->offset;
2100         } else {
2101                 to->offset = 0;
2102                 to->ptr = to->base;
2103                 to->top -= BAMBOO_CACHE_LINE_SIZE;
2104         } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2105         if(*localcompact) {
2106                 *heaptopptr = to->ptr;
2107                 *filledblocks = to->numblocks;
2108         }
2109 #ifdef DEBUG
2110         BAMBOO_DEBUGPRINT(0xe102);
2111         BAMBOO_DEBUGPRINT_REG(orig->ptr);
2112         BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2113         BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2114         BAMBOO_DEBUGPRINT_REG(*filledblocks);
2115         BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2116 #endif
2117
2118         // send msgs to core coordinator indicating that the compact is finishing
2119         // send compact finish message to core coordinator
2120         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2121                 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2122                 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2123                 if(orig->ptr < gcmarkedptrbound) {
2124 #ifdef DEBUG
2125                         BAMBOO_DEBUGPRINT(0xe103);
2126 #endif
2127                         // ask for more mem
2128                         gctomove = false;
2129                         BAMBOO_START_CRITICAL_SECTION();
2130                         if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
2131                                                               gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2132 #ifdef DEBUG
2133                                 BAMBOO_DEBUGPRINT(0xe104);
2134 #endif
2135                                 gctomove = true;
2136                         } else {
2137                                 BAMBOO_CLOSE_CRITICAL_SECTION();
2138 #ifdef DEBUG
2139                                 BAMBOO_DEBUGPRINT(0xe105);
2140 #endif
2141                                 return false; 
2142                         }
2143                         BAMBOO_CLOSE_CRITICAL_SECTION();
2144                 } else {
2145 #ifdef DEBUG
2146                         BAMBOO_DEBUGPRINT(0xe106);
2147 #endif
2148                         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2149                         gctomove = false;
2150                         return true;
2151                 }
2152         } else {
2153                 if(orig->ptr < gcmarkedptrbound) {
2154 #ifdef DEBUG
2155                         BAMBOO_DEBUGPRINT(0xe107);
2156 #endif
2157                         // ask for more mem
2158                         gctomove = false;
2159                         send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
2160                                                *filledblocks, *heaptopptr, gccurr_heaptop, false);
2161                 } else {
2162 #ifdef DEBUG
2163                         BAMBOO_DEBUGPRINT(0xe108);
2164                         BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2165 #endif
2166                         // finish compacting
2167                         send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2168                                                *filledblocks, *heaptopptr, 0, false);
2169                 }
2170         } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2171
2172         if(orig->ptr < gcmarkedptrbound) {
2173 #ifdef DEBUG
2174                 BAMBOO_DEBUGPRINT(0xe109);
2175 #endif
2176                 // still have unpacked obj
2177                 while(true) {
2178                         if(gctomove) {
2179                                 break;
2180                         }
2181                 };
2182                 gctomove = false;
2183 #ifdef DEBUG
2184                 BAMBOO_DEBUGPRINT(0xe10a);
2185 #endif
2186
2187                 to->ptr = gcmovestartaddr;
2188                 to->numblocks = gcblock2fill - 1;
2189                 to->bound = (to->numblocks==0)?
2190                         BAMBOO_SMEM_SIZE_L:
2191                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2192                 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2193                 to->offset = to->ptr - to->base;
2194                 to->top = (to->numblocks==0)?
2195                         (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
2196                 to->base = to->ptr;
2197                 to->offset = BAMBOO_CACHE_LINE_SIZE;
2198                 to->ptr += to->offset; // for header
2199                 to->top += to->offset;
2200                 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2201                         *localcompact = true;
2202                 } else {
2203                         *localcompact = false;
2204                 }
2205                 goto innercompact;
2206         }
2207 #ifdef DEBUG
2208         BAMBOO_DEBUGPRINT(0xe10b);
2209 #endif
2210         return true;
2211 } // void compacthelper()
2212
2213 inline void compact() {
2214         if(COMPACTPHASE != gcphase) {
2215                 BAMBOO_EXIT(0xb102);
2216         }
2217
2218         // initialize pointers for comapcting
2219         struct moveHelper * orig = 
2220                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2221         struct moveHelper * to = 
2222                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2223
2224         if(!initOrig_Dst(orig, to)) {
2225                 // no available data to compact
2226                 // send compact finish msg to STARTUP core
2227 #ifdef DEBUG
2228                 BAMBOO_DEBUGPRINT(0xe001);
2229                 BAMBOO_DEBUGPRINT_REG(to->base);
2230 #endif
2231                 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2232                                        0, to->base, 0, false);
2233                 RUNFREE(orig);
2234                 RUNFREE(to);
2235                 return;
2236         }
2237
2238         int filledblocks = 0;
2239         INTPTR heaptopptr = 0;
2240         bool localcompact = true;
2241         compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2242         
2243         RUNFREE(orig);
2244         RUNFREE(to);
2245 } // compact()
2246
2247 inline void * flushObj(void * objptr) {
2248 #ifdef DEBUG
2249         BAMBOO_DEBUGPRINT(0xe401);
2250 #endif
2251         void * dstptr = NULL;
2252         if(ISSHAREDOBJ(objptr)) {
2253 #ifdef DEBUG
2254                 BAMBOO_DEBUGPRINT(0xe402);
2255                 BAMBOO_DEBUGPRINT_REG(objptr);
2256 #endif
2257                 // a shared obj ptr, change to new address
2258                 BAMBOO_START_CRITICAL_SECTION();
2259                 //dstptr = mgchashSearch(objptr);
2260                 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2261                 //MGCHashget(gcpointertbl, objptr, &dstptr);
2262                 BAMBOO_CLOSE_CRITICAL_SECTION();
2263 #ifdef DEBUG
2264                 BAMBOO_DEBUGPRINT_REG(dstptr);
2265 #endif
2266                 if(NULL == dstptr) {
2267 #ifdef DEBUG
2268                         BAMBOO_DEBUGPRINT(0xe403);
2269                         BAMBOO_DEBUGPRINT_REG(objptr);
2270                         BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2271 #endif
2272                         if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2273                                 // error! the obj is right on this core, but cannot find it
2274                                 BAMBOO_DEBUGPRINT_REG(objptr);
2275                                 BAMBOO_EXIT(0xb103);
2276                                 // assume that the obj has not been moved, use the original address
2277                                 //dstptr = objptr;
2278                         } else {
2279                         // send msg to host core for the mapping info
2280                         gcobj2map = (int)objptr;
2281                         gcismapped = false;
2282                         gcmappedobj = NULL;
2283                         send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
2284                                                                  BAMBOO_NUM_OF_CORE, false);
2285                         while(true) {
2286                                 if(gcismapped) {
2287                                         break;
2288                                 }
2289                         }
2290                         BAMBOO_START_CRITICAL_SECTION();
2291                         //dstptr = mgchashSearch(objptr);
2292                         RuntimeHashget(gcpointertbl, objptr, &dstptr);
2293                         //MGCHashget(gcpointertbl, objptr, &dstptr);
2294                         BAMBOO_CLOSE_CRITICAL_SECTION();
2295                         }
2296 #ifdef DEBUG
2297                         BAMBOO_DEBUGPRINT_REG(dstptr);
2298 #endif
2299                 }
2300         } else {
2301                 // not a shared obj, use the old address
2302                 dstptr = objptr;
2303         }// if(ISSHAREDOBJ(objptr))
2304 #ifdef DEBUG
2305         BAMBOO_DEBUGPRINT(0xe404);
2306 #endif
2307         return dstptr;
2308 } // void flushObj(void * objptr, void ** tochange)
2309
2310 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2311         int i,j;
2312         // flush current stack 
2313         while(stackptr!=NULL) {
2314                 for(i=0; i<stackptr->size; i++) {
2315                         if(stackptr->array[i] != NULL) {
2316                                 stackptr->array[i] = flushObj(stackptr->array[i]);
2317                         }
2318                 }
2319                 stackptr=stackptr->next;
2320         }
2321
2322         // flush objectsets
2323         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2324                 for(i=0; i<NUMCLASSES; i++) {
2325                         struct parameterwrapper ** queues = 
2326                                 objectqueues[BAMBOO_NUM_OF_CORE][i];
2327                         int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2328                         for(j = 0; j < length; ++j) {
2329                                 struct parameterwrapper * parameter = queues[j];
2330                                 struct ObjectHash * set=parameter->objectset;
2331                                 struct ObjectNode * ptr=set->listhead;
2332                                 while(ptr!=NULL) {
2333                                         ptr->key = flushObj((void *)ptr->key);
2334                                         ptr=ptr->lnext;
2335                                 }
2336                                 ObjectHashrehash(set);
2337                         }
2338                 }
2339         }
2340
2341         // flush current task descriptor
2342         if(currtpd != NULL) {
2343                 for(i=0; i<currtpd->numParameters; i++) {
2344                         currtpd->parameterArray[i] = flushObj(currtpd->parameterArray[i]);
2345                 }
2346         }
2347
2348         // flush active tasks
2349         if(activetasks != NULL) {
2350                 struct genpointerlist * ptr=activetasks->list;
2351                 while(ptr!=NULL) {
2352                         struct taskparamdescriptor *tpd=ptr->src;
2353                         int i;
2354                         for(i=0; i<tpd->numParameters; i++) {
2355                                 tpd->parameterArray[i] = flushObj(tpd->parameterArray[i]);
2356                         }
2357                         ptr=ptr->inext;
2358                 }
2359                 genrehash(activetasks);
2360         }
2361
2362         // flush cached transferred obj
2363         struct QueueItem * tmpobjptr =  getHead(&objqueue);
2364         while(tmpobjptr != NULL) {
2365                 struct transObjInfo * objInfo = 
2366                         (struct transObjInfo *)(tmpobjptr->objectptr); 
2367                 objInfo->objptr = flushObj(objInfo->objptr);
2368                 tmpobjptr = getNextQueueItem(tmpobjptr);
2369         }
2370
2371         // flush cached objs to be transferred
2372         struct QueueItem * item = getHead(totransobjqueue);
2373         while(item != NULL) {
2374                 struct transObjInfo * totransobj = 
2375                         (struct transObjInfo *)(item->objectptr);
2376                 totransobj->objptr = flushObj(totransobj->objptr);
2377                 item = getNextQueueItem(item);
2378         } // while(item != NULL)
2379
2380         // enqueue lock related info
2381         for(i = 0; i < runtime_locklen; ++i) {
2382           runtime_locks[i].redirectlock = 
2383                         (int)flushObj(runtime_locks[i].redirectlock);
2384                 if(runtime_locks[i].value != NULL) {
2385                   runtime_locks[i].value = (int)flushObj(runtime_locks[i].value);
2386           }
2387         }
2388
2389 } // void flushRuntimeObj(struct garbagelist * stackptr)
2390
2391 inline void flush(struct garbagelist * stackptr) {
2392         flushRuntimeObj(stackptr);
2393         
2394         while(gc_moreItems()) {
2395 #ifdef DEBUG
2396                 BAMBOO_DEBUGPRINT(0xe301);
2397 #endif
2398                 void * ptr = gc_dequeue();
2399                 if(ISSHAREDOBJ(ptr)) {
2400                 void * tptr = flushObj(ptr);
2401 #ifdef DEBUG
2402                 BAMBOO_DEBUGPRINT(0xe302);
2403                 BAMBOO_DEBUGPRINT_REG(ptr);
2404                 BAMBOO_DEBUGPRINT_REG(tptr);
2405                 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2406 #endif
2407                 if(tptr != NULL) {
2408                         ptr = tptr;
2409                 }
2410                 }
2411                 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2412                         int type = ((int *)(ptr))[0];
2413                         // scan all pointers in ptr
2414                         unsigned INTPTR * pointer;
2415                         pointer=pointerarray[type];
2416 #ifdef DEBUG
2417                         BAMBOO_DEBUGPRINT(0xe303);
2418                         BAMBOO_DEBUGPRINT_REG(pointer);
2419 #endif
2420                         if (pointer==0) {
2421                                 /* Array of primitives */
2422                                 /* Do nothing */
2423                         } else if (((INTPTR)pointer)==1) {
2424 #ifdef DEBUG
2425                                 BAMBOO_DEBUGPRINT(0xe304);
2426 #endif
2427                                 /* Array of pointers */
2428                                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2429                                 int length=ao->___length___;
2430                                 int j;
2431                                 for(j=0; j<length; j++) {
2432 #ifdef DEBUG
2433                                         BAMBOO_DEBUGPRINT(0xe305);
2434 #endif
2435                                         void *objptr=
2436                                                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2437 #ifdef DEBUG
2438                                         BAMBOO_DEBUGPRINT_REG(objptr);
2439 #endif
2440                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
2441                                                 flushObj(objptr);
2442                                 }
2443                         } else {
2444 #ifdef DEBUG
2445                                 BAMBOO_DEBUGPRINT(0xe306);
2446 #endif
2447                                 INTPTR size=pointer[0];
2448                                 int i;
2449                                 for(i=1; i<=size; i++) {
2450 #ifdef DEBUG
2451                                         BAMBOO_DEBUGPRINT(0xe307);
2452 #endif
2453                                         unsigned int offset=pointer[i];
2454                                         void * objptr=*((void **)(((char *)ptr)+offset));
2455
2456 #ifdef DEBUG
2457                                         BAMBOO_DEBUGPRINT_REG(objptr);
2458 #endif
2459                                         *((void **)(((char *)ptr)+offset)) = flushObj(objptr);
2460                                 } // for(i=1; i<=size; i++) 
2461                         } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2462                         // restore the mark field, indicating that this obj has been flushed
2463                         if(ISSHAREDOBJ(ptr)) {
2464                                 ((int *)(ptr))[6] = INIT;
2465                         }
2466                 } // if(((int *)(ptr))[6] == COMPACTED)
2467         } // while(gc_moreItems())
2468 #ifdef DEBUG
2469         BAMBOO_DEBUGPRINT(0xe308);
2470 #endif
2471
2472         // TODO bug here: the startup core contains all lobjs' info, it will 
2473         // redundantly flush the lobjs found in the other cores.
2474         // flush lobjs
2475         while(gc_lobjmoreItems()) {
2476 #ifdef DEBUG
2477                 BAMBOO_DEBUGPRINT(0xe309);
2478 #endif
2479                 void * ptr = gc_lobjdequeue(NULL, NULL);
2480                 //if(ISSHAREDOBJ(ptr)) {
2481                 void * tptr = flushObj(ptr);
2482 #ifdef DEBUG
2483                 BAMBOO_DEBUGPRINT(0xe30a);
2484                 BAMBOO_DEBUGPRINT_REG(ptr);
2485                 BAMBOO_DEBUGPRINT_REG(tptr);
2486                 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2487 #endif
2488                 if(tptr != NULL) {
2489                         ptr = tptr;
2490                 }
2491                 //}
2492                 if(/*(!ISSHAREDOBJ(ptr)) || */(((int *)(ptr))[6] == COMPACTED)) {
2493                         int type = ((int *)(ptr))[0];
2494                         // scan all pointers in ptr
2495                         unsigned INTPTR * pointer;
2496                         pointer=pointerarray[type];
2497 #ifdef DEBUG
2498                         BAMBOO_DEBUGPRINT(0xe30b);
2499                         BAMBOO_DEBUGPRINT_REG(pointer);
2500 #endif
2501                         if (pointer==0) {
2502                                 /* Array of primitives */
2503                                 /* Do nothing */
2504                         } else if (((INTPTR)pointer)==1) {
2505 #ifdef DEBUG
2506                                 BAMBOO_DEBUGPRINT(0xe30c);
2507 #endif
2508                                 /* Array of pointers */
2509                                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2510                                 int length=ao->___length___;
2511                                 int j;
2512                                 for(j=0; j<length; j++) {
2513 #ifdef DEBUG
2514                                         BAMBOO_DEBUGPRINT(0xe30d);
2515 #endif
2516                                         void *objptr=
2517                                                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2518 #ifdef DEBUG
2519                                         BAMBOO_DEBUGPRINT_REG(objptr);
2520 #endif
2521                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
2522                                                 flushObj(objptr);
2523                                 }
2524                         } else {
2525 #ifdef DEBUG
2526                                 BAMBOO_DEBUGPRINT(0xe30e);
2527 #endif
2528                                 INTPTR size=pointer[0];
2529                                 int i;
2530                                 for(i=1; i<=size; i++) {
2531 #ifdef DEBUG
2532                                         BAMBOO_DEBUGPRINT(0xe30f);
2533 #endif
2534                                         unsigned int offset=pointer[i];
2535                                         void * objptr=*((void **)(((char *)ptr)+offset));
2536
2537 #ifdef DEBUG
2538                                         BAMBOO_DEBUGPRINT_REG(objptr);
2539 #endif
2540                                         *((void **)(((char *)ptr)+offset)) = flushObj(objptr);
2541                                 } // for(i=1; i<=size; i++) 
2542                         } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2543                         // restore the mark field, indicating that this obj has been flushed
2544                         //if(ISSHAREDOBJ(ptr)) {
2545                                 ((int *)(ptr))[6] = INIT;
2546                         //}
2547                 } // if(((int *)(ptr))[6] == COMPACTED)
2548         } // while(gc_lobjmoreItems())
2549 #ifdef DEBUG
2550         BAMBOO_DEBUGPRINT(0xe310);
2551 #endif
2552
2553         // send flush finish message to core coordinator
2554         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2555                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2556         } else {
2557                 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2558         }
2559 #ifdef DEBUG
2560         BAMBOO_DEBUGPRINT(0xe311);
2561 #endif
2562 } // flush()
2563
2564 inline void gc_collect(struct garbagelist * stackptr) {
2565         // core collector routine
2566         while(true) {
2567                 //BAMBOO_START_CRITICAL_SECTION();
2568                 if(INITPHASE == gcphase) {
2569                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2570                         break;
2571                 }
2572                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2573         }
2574 #ifdef RAWPATH // TODO GC_DEBUG
2575         tprintf("Do initGC\n");
2576 #endif
2577         initGC();
2578         //send init finish msg to core coordinator
2579         send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
2580         while(true) {
2581                 //BAMBOO_START_CRITICAL_SECTION();
2582                 if(MARKPHASE == gcphase) {
2583                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2584                         break;
2585                 }
2586                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2587         }
2588 #ifdef RAWPATH // TODO GC_DEBUG
2589         tprintf("Start mark phase\n");
2590 #endif
2591         mark(true, stackptr);
2592 #ifdef RAWPATH // TODO GC_DEBUG
2593         tprintf("Finish mark phase, start compact phase\n");
2594 #endif
2595         compact();
2596 #ifdef RAWPATH // TODO GC_DEBUG
2597         tprintf("Finish compact phase\n");
2598 #endif
2599         while(true) {
2600                 //BAMBOO_START_CRITICAL_SECTION();
2601                 if(FLUSHPHASE == gcphase) {
2602                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2603                         break;
2604                 }
2605                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2606         }
2607 #ifdef RAWPATH // TODO GC_DEBUG
2608         tprintf("Start flush phase\n");
2609 #endif
2610         flush(stackptr);
2611 #ifdef RAWPATH // TODO GC_DEBUG
2612         tprintf("Finish flush phase\n");
2613 #endif
2614
2615         while(true) {
2616                 //BAMBOO_START_CRITICAL_SECTION();
2617                 if(FINISHPHASE == gcphase) {
2618                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2619                         break;
2620                 }
2621                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2622         }
2623 #ifdef RAWPATH // TODO GC_DEBUG
2624         tprintf("Finish gc!\n");
2625 #endif
2626 } // void gc_collect(struct garbagelist * stackptr)
2627
2628 inline void gc(struct garbagelist * stackptr) {
2629         // check if do gc
2630         if(!gcflag) {
2631                 gcprocessing = false;
2632                 return;
2633         }
2634
2635         // core coordinator routine
2636         if(0 == BAMBOO_NUM_OF_CORE) {
2637 #ifdef GC_DEBUG
2638         tprintf("Check if can do gc or not\n");
2639 #endif
2640                 if(!preGC()) {
2641                         // not ready to do gc
2642                         gcflag = true;
2643                         return;
2644                 }
2645
2646 #ifdef RAWPATH // TODO GC_DEBUG
2647                 tprintf("start gc! \n");
2648                 //dumpSMem();
2649 #endif
2650                 gcprocessing = true;
2651                 int i = 0;
2652                 waitconfirm = false;
2653                 waitconfirm = 0;
2654                 gcphase = INITPHASE;
2655                 for(i = 1; i < NUMCORES4GC; i++) {
2656                         // send GC init messages to all cores
2657                         send_msg_1(i, GCSTARTINIT, false);
2658                 }
2659                 bool isfirst = true;
2660                 bool allStall = false;
2661
2662                 initGC();
2663 #ifdef RAWPATH // TODO GC_DEBUG
2664                 tprintf("Check core status \n");
2665 #endif
2666
2667                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2668                 while(true) {
2669                         BAMBOO_START_CRITICAL_SECTION();
2670                         if(gc_checkCoreStatus()) {
2671                                 BAMBOO_CLOSE_CRITICAL_SECTION();
2672                                 break;
2673                         }
2674                         BAMBOO_CLOSE_CRITICAL_SECTION();
2675                 }
2676 #ifdef RAWPATH // TODO GC_DEBUG
2677                 tprintf("Start mark phase \n");
2678 #endif
2679                 // all cores have finished compacting
2680                 // restore the gcstatus of all cores
2681                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2682                 for(i = 1; i < NUMCORES4GC; ++i) {
2683                         gccorestatus[i] = 1;
2684                         // send GC start messages to all cores
2685                         send_msg_1(i, GCSTART, false);
2686                 }
2687
2688                 gcphase = MARKPHASE;
2689     // mark phase
2690                 while(MARKPHASE == gcphase) {
2691                         mark(isfirst, stackptr);
2692                         if(isfirst) {
2693                                 isfirst = false;
2694                         }
2695
2696                         // check gcstatus
2697                         checkMarkStatue(); 
2698                 }  // while(MARKPHASE == gcphase)
2699                 // send msgs to all cores requiring large objs info
2700                 numconfirm = NUMCORES4GC - 1;
2701                 for(i = 1; i < NUMCORES4GC; ++i) {
2702                         send_msg_1(i, GCLOBJREQUEST, false);
2703                 }
2704                 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
2705                 while(true) {
2706                         if(numconfirm==0) {
2707                                 break;
2708                         }
2709                 } // wait for responses
2710                 // check the heaptop
2711                 if(gcheaptop < gcmarkedptrbound) {
2712                         gcheaptop = gcmarkedptrbound;
2713                 }
2714 #ifdef RAWPATH // TODO GC_DEBUG
2715                 tprintf("prepare to cache large objs \n");
2716                 //dumpSMem();
2717 #endif
2718                 // cache all large objs
2719                 if(!cacheLObjs()) {
2720                         // no enough space to cache large objs
2721                         BAMBOO_EXIT(0xb104);
2722                 }
2723                 // predict number of blocks to fill for each core
2724                 int tmpheaptop = 0;
2725                 int numpbc = loadbalance(&tmpheaptop);
2726                 // TODO
2727                 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
2728 #ifdef RAWPATH // TODO GC_DEBUG
2729                 tprintf("mark phase finished \n");
2730                 //dumpSMem();
2731 #endif
2732                 //int tmptopptr = 0;
2733                 //BASEPTR(gctopcore, 0, &tmptopptr);
2734                 // TODO
2735                 //tmptopptr = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
2736                 tmpheaptop = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
2737 #ifdef DEBUG
2738                 BAMBOO_DEBUGPRINT(0xabab);
2739                 BAMBOO_DEBUGPRINT_REG(tmptopptr);
2740 #endif
2741                 for(i = 0; i < NUMCORES4GC; ++i) {
2742                         int tmpcoreptr = 0;
2743                         BASEPTR(i, numpbc, &tmpcoreptr);
2744                         //send start compact messages to all cores
2745                         //TODO bug here, do not know if the direction is positive or negtive?
2746                         if (tmpcoreptr < tmpheaptop/*tmptopptr*/) {
2747                                 gcstopblock[i] = numpbc + 1;
2748                                 if(i != STARTUPCORE) {
2749                                         send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false); 
2750                                 } else {
2751                                         gcblock2fill = numpbc+1;
2752                                 } // if(i != STARTUPCORE)
2753                         } else {
2754                                 gcstopblock[i] = numpbc;
2755                                 if(i != STARTUPCORE) {
2756                                         send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
2757                                 } else {
2758                                         gcblock2fill = numpbc;
2759                                 } // if(i != STARTUPCORE)
2760                         }
2761 #ifdef DEBUG
2762                         BAMBOO_DEBUGPRINT(0xf000+i);
2763                         BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
2764                         BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
2765 #endif
2766                         // init some data strutures for compact phase
2767                         gcloads[i] = 0;
2768                         gcfilledblocks[i] = 0;
2769                         gcrequiredmems[i] = 0;
2770                 }
2771
2772                 // compact phase
2773                 bool finalcompact = false;
2774                 // initialize pointers for comapcting
2775                 struct moveHelper * orig = 
2776                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2777                 struct moveHelper * to = 
2778                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2779                 initOrig_Dst(orig, to);
2780                 int filledblocks = 0;
2781                 INTPTR heaptopptr = 0;
2782                 bool finishcompact = false;
2783                 bool iscontinue = true;
2784                 bool localcompact = true;
2785                 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
2786                         if((!finishcompact) && iscontinue) {
2787 #ifdef DEBUG
2788                                 BAMBOO_DEBUGPRINT(0xe001);
2789                                 BAMBOO_DEBUGPRINT_REG(numpbc);
2790                                 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2791 #endif
2792                                 finishcompact = compacthelper(orig, to, &filledblocks, 
2793                                                                           &heaptopptr, &localcompact);
2794 #ifdef DEBUG
2795                                 BAMBOO_DEBUGPRINT(0xe002);
2796                                 BAMBOO_DEBUGPRINT_REG(finishcompact);
2797                                 BAMBOO_DEBUGPRINT_REG(gctomove);
2798                                 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
2799                                 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
2800                                 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
2801 #endif
2802                         }
2803
2804                         if(gc_checkCoreStatus()) {
2805                                 // all cores have finished compacting
2806                                 // restore the gcstatus of all cores
2807                                 for(i = 0; i < NUMCORES4GC; ++i) {
2808                                         gccorestatus[i] = 1;
2809                                 }
2810                                 break;
2811                         } else {
2812                                 // check if there are spare mem for pending move requires
2813                                 if(COMPACTPHASE == gcphase) {
2814 #ifdef DEBUG
2815                                         BAMBOO_DEBUGPRINT(0xe003);
2816 #endif
2817                                         resolvePendingMoveRequest();
2818 #ifdef DEBUG
2819                                         BAMBOO_DEBUGPRINT_REG(gctomove);
2820 #endif
2821                                 } else {
2822 #ifdef DEBUG
2823                                         BAMBOO_DEBUGPRINT(0xe004);
2824 #endif
2825                                         compact2Heaptop();
2826                                 }
2827                         } // if(gc_checkCoreStatus()) else ...
2828
2829                         if(gctomove) {
2830 #ifdef DEBUG
2831                                 BAMBOO_DEBUGPRINT(0xe005);
2832                                 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
2833                                 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2834                                 BAMBOO_DEBUGPRINT_REG(gctomove);
2835 #endif
2836                                 to->ptr = gcmovestartaddr;
2837                                 to->numblocks = gcblock2fill - 1;
2838                                 to->bound = (to->numblocks==0)?
2839                                         BAMBOO_SMEM_SIZE_L:
2840                                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2841                                 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2842                                 to->offset = to->ptr - to->base;
2843                                 to->top = (to->numblocks==0)?
2844                                         (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
2845                                 to->base = to->ptr;
2846                                 to->offset = BAMBOO_CACHE_LINE_SIZE;
2847                                 to->ptr += to->offset; // for header
2848                                 to->top += to->offset;
2849                                 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2850                                         localcompact = true;
2851                                 } else {
2852                                         localcompact = false;
2853                                 }
2854                                 gctomove = false;
2855                                 iscontinue = true;
2856                         } else if(!finishcompact) {
2857                                 // still pending
2858                                 iscontinue = false;
2859                         } // if(gctomove)
2860
2861                 } // while(COMPACTPHASE == gcphase) 
2862         
2863 #ifdef RAWPATH // TODO GC_DEBUG
2864                 tprintf("prepare to move large objs \n");
2865                 //dumpSMem();
2866 #endif
2867                 // move largeObjs
2868                 moveLObjs();
2869 #ifdef RAWPATH // TODO GC_DEBUG
2870                 tprintf("compact phase finished \n");
2871                 //dumpSMem();
2872 #endif
2873                 RUNFREE(orig);
2874                 RUNFREE(to);
2875                 orig = to = NULL;
2876
2877                 gcphase = FLUSHPHASE;
2878                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2879                 for(i = 1; i < NUMCORES4GC; ++i) {
2880                         // send start flush messages to all cores
2881                         gccorestatus[i] = 1;
2882                         send_msg_1(i, GCSTARTFLUSH, false);
2883                 }
2884
2885 #ifdef RAWPATH // TODO GC_DEBUG
2886                 tprintf("Start flush phase \n");
2887 #endif
2888                 // flush phase
2889                 flush(stackptr);
2890                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2891                 while(FLUSHPHASE == gcphase) {
2892                         // check the status of all cores
2893                         if(gc_checkCoreStatus()) {
2894                                 break;
2895                         }
2896                 } // while(FLUSHPHASE == gcphase)
2897                 gcphase = FINISHPHASE;
2898
2899                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2900                 for(i = 1; i < NUMCORES4GC; ++i) {
2901                         // send gc finish messages to all cores
2902                         send_msg_1(i, GCFINISH, false);
2903                         gccorestatus[i] = 1;
2904                 }
2905 #ifdef RAWPATH // TODO GC_DEBUG
2906                 tprintf("gc finished \n");
2907                 //dumpSMem();
2908 #endif
2909         } else {
2910                 gcprocessing = true;
2911                 gc_collect(stackptr);
2912         }
2913
2914         // invalidate all shared mem pointers
2915         bamboo_cur_msp = NULL;
2916         bamboo_smem_size = 0;
2917
2918         gcflag = false;
2919         gcprocessing = false;
2920
2921 } // void gc(struct garbagelist * stackptr)
2922
2923 #endif