Bug fix: during gc compact, one block's usage information could be modified by multip...
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
1 #ifdef MULTICORE_GC
2 #include "structdefs.h"
3 #include "multicoregccompact.h"
4 #include "runtime_arch.h"
5 #include "multicoreruntime.h"
6 #include "multicoregarbage.h"
7 #include "markbit.h"
8 #include "multicoremem_helper.h"
9 #include "gcqueue.h"
10
11 int gc_countRunningCores() {
12   int count=0;
13   for(int i = 0; i < NUMCORES4GC; i++) {
14     if(returnedmem[i]) {
15       count++;
16     }
17   }
18   return count;
19 }
20
21 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
22   // init the dst ptr
23   to->localblocknum = 0;
24   BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
25   to->ptr = to->base;
26   to->bound=to->base+BLOCKSIZE(to->localblocknum);
27   
28   // init the orig ptr
29   orig->localblocknum = 0;
30   orig->ptr=orig->base = to->base;
31   orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
32 #ifdef GC_CACHE_ADAPT
33   to->pagebound=to->base+BAMBOO_PAGE_SIZE;
34   orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
35 #endif
36 }
37
38 void getSpaceLocally(struct moveHelper *to) {
39   //we have space on our core...just keep going
40   to->localblocknum++;
41   BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
42   to->ptr=to->base;
43   to->bound=to->base+BLOCKSIZE(to->localblocknum);
44 #ifdef GC_CACHE_ADAPT
45   to->pagebound=to->base+BAMBOO_PAGE_SIZE;
46 #endif
47 }
48
49 //This function is called on the master core only...and typically by
50 //the message interrupt handler
51
52 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
53   unsigned int blockindex;
54   BLOCKINDEX(blockindex, heaptop);
55   unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
56   //this core is done as far as memory usage is concerned
57   returnedmem[cnum]=0;
58
59   struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
60
61   unsigned INTPTR newusedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
62   if(blockrecord->usedspace < newusedspace) {
63     blockrecord->status=BS_FREE;
64     blockrecord->usedspace=newusedspace;
65     blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
66     /* Update the lowest free block */
67     if (blockindex < allocationinfo.lowestfreeblock) {
68       allocationinfo.lowestfreeblock=blockindex;
69     }
70
71     /* This is our own block...means we should mark other blocks above us as free*/
72
73     if (cnum==blockrecord->corenum) {
74       unsigned INTPTR nextlocalblocknum=localblocknum+1;
75       for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
76         unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
77         struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
78         nextblockrecord->status=BS_FREE;
79         nextblockrecord->usedspace=0;
80         //this is true because this cannot be the lowest block
81         nextblockrecord->freespace=BLOCKSIZE(1);
82       }
83     }
84   }
85
86   //this could be the last one....
87   int count=gc_countRunningCores();
88   if (gcmovepending==count) {
89     // All cores have stopped...hand out memory as necessary to handle all requests
90     handleMemoryRequests_I();
91   } else {
92     //see if returned memory blocks let us resolve requests
93     useReturnedMem(cnum, allocationinfo.lowestfreeblock);
94   }
95 }
96
97 void useReturnedMem(unsigned int retcorenum, block_t localblockindex) {
98   for(int i=0;i<NUMCORES4GC;i++) {
99     unsigned INTPTR requiredmem=gcrequiredmems[i];
100     if (requiredmem) {
101       unsigned INTPTR desiredmem=maxusefulmems[i];
102       unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
103       unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
104
105
106       for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
107         unsigned INTPTR blocknum=BLOCKINDEX2(retcorenum, nextlocalblocknum);
108         struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
109         if (nextblockrecord->status==BS_FREE) {
110           unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
111           if (freespace>=memcheck) {
112             nextblockrecord->status=BS_USED;
113             void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
114             unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
115             //taken care of one block
116             gcmovepending--;
117             void *startaddr=blockptr+usedspace;
118             gcrequiredmems[i]=0;
119             maxusefulmems[i]=0;
120             if (i==STARTUPCORE) {
121               gctomove = true;
122               gcmovestartaddr = startaddr;
123             } else if(BAMBOO_CHECK_SEND_MODE()) {
124               cache_msg_2_I(i,GCMOVESTART,startaddr);
125             } else {
126               send_msg_2_I(i,GCMOVESTART,startaddr);
127             }
128           }
129         }
130       }
131     }
132   }
133 }
134
135 void handleReturnMem(unsigned int cnum, void *heaptop) {
136   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
137   handleReturnMem_I(cnum, heaptop);
138   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
139 }
140
141 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
142   //need to get another block from elsewhere
143   //set flag to wait for memory
144
145   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
146     gctomove=false;
147     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
148     void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
149     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
150
151     if (startaddr) {
152       gcmovestartaddr=startaddr;
153     } else {
154       while(!gctomove) ;
155     }
156   } else {
157     gctomove=false;
158     //send request for memory
159     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
160     //wait for flag to be set that we received message
161     while(!gctomove)
162       ;
163   }
164
165   //store pointer
166   to->ptr = gcmovestartaddr;
167
168   //set localblock number to high number to indicate this block isn't local
169   to->localblocknum = MAXBLOCK;
170   unsigned int globalblocknum;
171   BLOCKINDEX(globalblocknum, to->ptr);
172   to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
173   to->bound=gcbaseva+BOUNDPTR(globalblocknum);
174 #ifdef GC_CACHE_ADAPT
175   to->pagebound=(void *)((int)((int)(to->ptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
176 #endif
177 }
178
179 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
180   //need more space to compact into
181   if ((to->localblocknum+1) < gcblock2fill) {
182     getSpaceLocally(to);
183   } else {
184     getSpaceRemotely(to, minimumbytes);
185   }
186 }
187
188 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
189   bool senttopmessage=false;
190   while(true) {
191     if ((gccurr_heaptop <= ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
192       //This block is the last for this core...let the startup know
193       if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
194         handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
195       } else {
196         send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
197       }
198       //Only send the message once
199       senttopmessage=true;
200     }
201     unsigned int minimumbytes=COMPACTUNITS(orig, to);
202     if (orig->ptr==orig->bound) {
203       //need more data to compact
204       //increment the core
205       orig->localblocknum++;
206       BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
207       orig->ptr=orig->base;
208       orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
209 #ifdef GC_CACHE_ADAPT
210       orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
211 #endif
212       if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE) {
213         CACHEADAPT_FINISH_COMPACT(to->ptr);
214         break;
215       }
216     }
217     if (minimumbytes!=0) {
218       getSpace(to, minimumbytes);
219     }
220   }
221   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
222     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
223     handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
224     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
225   } else {
226     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
227   }
228 }
229
230 void * checkNeighbors_I(int ncorenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
231   int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
232   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
233   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
234
235   for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
236     for(int i=0;i<NUM_CORES2TEST;i++) {
237       int neighborcore=core2test[ncorenum][i];
238       if (neighborcore!=-1) {
239         block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
240         struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
241         if (block->status==BS_FREE) {
242           unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
243           if (memcheck<=freespace) {
244             //we have a block
245             //mark block as used
246             block->status=BS_USED;
247             void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
248             unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
249             return blockptr+usedspace;
250           }
251         }
252       }
253     }
254   }
255   return NULL;
256 }
257
258 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
259   unsigned int firstfree=NOFREEBLOCK;
260   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
261   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
262
263   for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
264     struct blockrecord * block=&allocationinfo.blocktable[i];
265     if (block->status==BS_FREE) {
266       if(firstfree==NOFREEBLOCK)
267         firstfree=i;
268       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
269       if (memcheck<=freespace) {
270         //we have a block
271         //mark block as used
272         block->status=BS_USED;
273         void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
274         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
275         allocationinfo.lowestfreeblock=firstfree;
276         return blockptr+usedspace;
277       }
278     }
279   }
280   allocationinfo.lowestfreeblock=firstfree;
281   return NULL;
282 }
283
284 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
285   unsigned INTPTR requiredmem=gcrequiredmems[core];
286   unsigned INTPTR desiredmem=maxusefulmems[core];
287   block_t firstfree=NOFREEBLOCK;
288   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
289   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
290
291   for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
292     struct blockrecord * block=&allocationinfo.blocktable[searchblock];
293     if (block->status==BS_FREE) {
294       if(firstfree==NOFREEBLOCK)
295         firstfree=searchblock;
296       //don't take a block from another core that hasn't returned its memory yet
297       if (block->corenum!=core&&returnedmem[block->corenum])
298         continue;
299       
300       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
301       if (freespace>=memcheck) {
302         //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
303
304         //we have a block
305         //mark block as used
306         block->status=BS_USED;
307         void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
308         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
309         allocationinfo.lowestfreeblock=firstfree;
310         //taken care of one block
311         gcmovepending--;
312         void *startaddr=blockptr+usedspace;
313         if (core==STARTUPCORE) {
314           gctomove=true;
315           gcmovestartaddr=startaddr;
316         } else if(BAMBOO_CHECK_SEND_MODE()) {
317           cache_msg_2_I(core,GCMOVESTART,startaddr);
318         } else {
319           send_msg_2_I(core,GCMOVESTART,startaddr);
320         }
321         return;
322       }
323     }
324   }
325   //this is bad...ran out of memory
326   printf("Out of memory.  Was trying for %u bytes\n", threshold);
327   BAMBOO_EXIT();
328 }
329
330 void handleMemoryRequests_I() {
331   unsigned int lowestblock=allocationinfo.lowestfreeblock;
332   if (lowestblock==NOFREEBLOCK) {
333     lowestblock=numblockspercore*NUMCORES4GC;
334   }
335   
336   for(int i=0;i < NUMCORES4GC; i++) {
337     if (gcrequiredmems[i]) {
338       handleOneMemoryRequest(i, lowestblock);
339       lowestblock=allocationinfo.lowestfreeblock;
340     }
341   }
342 }
343
344 /* should be invoked with interrupt turned off */
345
346 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
347   if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
348     //There are spare blocks
349     unsigned int topblock=numblockspercore*NUMCORES4GC;
350     void *memblock;
351     
352     if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
353       return memblock;
354     } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
355       return memblock;
356     }
357   }
358   
359   // If we cannot find spare mem right now, hold the request
360   gcrequiredmems[requiredcore] = requiredmem;
361   maxusefulmems[requiredcore]=desiredmem;
362   gcmovepending++;
363
364   int count=gc_countRunningCores();
365   if (gcmovepending==count) {
366     // All cores have stopped...hand out memory as necessary to handle all requests
367     handleMemoryRequests_I();
368   }
369
370   return NULL;
371
372
373 #ifdef GC_CACHE_ADAPT
374 unsigned int compactblockshelper(struct moveHelper * orig, struct moveHelper * to) {
375   unsigned int minimumbytes=0;
376   void *origptr=orig->ptr;
377   void *origbound=orig->bound;
378   void * tmporig=orig->ptr;
379   void * tmpto=to->ptr;
380
381   while(true) {
382     //call compactblocks using the page boundaries at the current bounds
383     minimumbytes=compactblocks(orig, to);
384     if(minimumbytes == 0) {
385       //bump the orig page bound...
386       //use old orig pointer to make sure we get correct block
387       CACHEADAPT_FINISH_SRC_PAGE(tmporig, tmpto, to->ptr);
388       if (orig->ptr<origbound) {
389         tmporig=orig->ptr;
390         tmpto=to->ptr;
391         orig->pagebound=orig->pagebound+BAMBOO_PAGE_SIZE;
392       } else {
393         return 0;
394       }
395     } else {
396       // require more memory
397       void *endtoptr=to->ptr+minimumbytes;
398       if (endtoptr>to->bound) {
399         CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, 0);
400         return minimumbytes;
401       } else {
402         CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, minimumbytes);
403         to->pagebound=((((unsigned INTPTR)endtoptr)-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
404         //update pointers to avoid double counting the stuff we already added in
405         tmporig=orig->ptr+minimumbytes;
406         tmpto=to->ptr+minimumbytes;
407       }
408     }
409   }
410 }
411 #endif
412
413 /* This function is performance critical...  spend more time optimizing it */
414
415 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
416   void *toptrinit=to->ptr;
417   void *toptr=toptrinit;
418   void *origptr=orig->ptr;
419 #ifdef GC_CACHE_ADAPT
420   void *origbound=orig->pagebound;
421   void *tobound=to->pagebound;
422   //set to the first line so we don't need conditions
423   void *lastflush=(void *)(((unsigned INTPTR)&gcmappingtbl[OBJMAPPINGINDEX(origptr)])&~(BAMBOO_CACHE_LINE_MASK));
424 #else
425   void *origbound=orig->bound;
426   void *tobound=to->bound;
427 #endif
428   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
429   unsigned int objlength;
430
431   while(origptr<origbound) {
432     //Try to skip over stuff fast first
433     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
434     unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
435     if (!gcmarktbl[arrayoffset]) {
436       do {
437         arrayoffset++;
438         if (arrayoffset>=origendoffset) {
439           //finished with block(a page in CACHE_ADAPT version)...
440           to->ptr=toptr;
441           orig->ptr=origbound;
442           gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
443 #ifdef GC_CACHE_ADAPT
444           BAMBOO_CACHE_FLUSH_LINE(lastflush);
445 #endif
446           return 0;
447         }
448       } while(!gcmarktbl[arrayoffset]);
449       origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
450     }
451     
452     //Scan more carefully next
453     objlength=getMarkedLength(origptr);
454
455     if (objlength!=NOTMARKED) {
456       unsigned int length=ALIGNSIZETOBYTES(objlength);
457
458       //code between this and next comment should be removed
459 #ifdef GC_DEBUG
460       unsigned int size;
461       unsigned int type;
462       gettype_size(origptr, &type, &size);
463       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
464       
465       if (size!=length) {
466         tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
467         unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
468         unsigned INTPTR hibits=alignsize>>4;
469         unsigned INTPTR lobits=(alignsize&15)<<1;
470         tprintf("hibits=%x lobits=%x\n", hibits, lobits);
471         tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
472       }
473 #endif
474       //end of code to remove
475
476       void *endtoptr=toptr+length;
477       if (endtoptr>tobound) {
478         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
479         to->ptr=toptr;
480         orig->ptr=origptr;
481 #ifdef GC_CACHE_ADAPT
482         BAMBOO_CACHE_FLUSH_LINE(lastflush);
483 #endif
484         return length;
485       }
486       //good to move objects and update pointers
487       
488       void ** mapptr=&gcmappingtbl[OBJMAPPINGINDEX(origptr)];
489       *mapptr=toptr;
490
491 #ifdef GC_CACHE_ADAPT
492       void *maskmapptr=(void *)(((unsigned INTPTR)mapptr)&~(BAMBOO_CACHE_LINE_MASK));
493
494       if (lastflush!=maskmapptr) {
495         BAMBOO_CACHE_FLUSH_LINE(lastflush);
496         lastflush=maskmapptr;
497       }
498 #endif
499
500       origptr+=length;
501       toptr=endtoptr;
502     } else
503       origptr+=ALIGNMENTSIZE;
504   }
505   to->ptr=toptr;
506   orig->ptr=origptr;
507   gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
508 #ifdef GC_CACHE_ADAPT
509   BAMBOO_CACHE_FLUSH_LINE(lastflush);
510 #endif
511   return 0;
512 }
513
514 void compact() {
515   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
516   
517   // initialize structs for compacting
518   struct moveHelper orig;
519   struct moveHelper to;
520   initOrig_Dst(&orig, &to);
521
522   compacthelper(&orig, &to);
523 #ifdef GC_CACHE_ADAPT
524   BAMBOO_CACHE_MF();
525 #endif
526
527
528 void master_compact() {
529   // predict number of blocks to fill for each core
530   numblockspercore = loadbalance()+1;
531   numblockspercore = (numblockspercore>GCNUMLOCALBLOCK)?GCNUMLOCALBLOCK:numblockspercore;
532
533
534   GC_PRINTF("mark phase finished \n");
535   
536   gc_resetCoreStatus();
537   //initialize local data structures first....we don't want remote requests messing data up
538   unsigned int initblocks=numblockspercore*NUMCORES4GC;
539   allocationinfo.lowestfreeblock=NOFREEBLOCK;
540
541   //assigned blocks
542   for(int i=0;i<initblocks;i++) {
543     allocationinfo.blocktable[i].status=BS_USED;
544   }
545
546   //free blocks
547   for(int i=initblocks;i<GCNUMBLOCK;i++) {
548     allocationinfo.blocktable[i].status=BS_FREE;
549     allocationinfo.blocktable[i].usedspace=0;
550     //this is true because all cores have at least one block already...
551     allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
552   }
553
554   //start all of the cores
555   for(int i = 0; i < NUMCORES4GC; i++) {
556     // init some data strutures for compact phase
557     gcrequiredmems[i] = 0;
558     gccorestatus[i] = 1;
559     returnedmem[i] = 1;
560     //send start compact messages to all cores
561     if(i != STARTUPCORE) {
562       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
563     } else {
564       gcblock2fill = numblockspercore;
565     }
566   }
567   GCPROFILE_ITEM_MASTER();
568   // compact phase
569   compact();
570   /* wait for all cores to finish compacting */
571   GC_PRINTF("master finished\n");
572
573   while(!gc_checkCoreStatus())
574     ;
575
576 // GC_CACHE_COHERENT_ON should be true for gcmappingtbl, and the gcmappingtbl should be zeroed out before starting gc
577 #ifdef GC_DEBUG
578   void *nextvalid=gcbaseva;
579   for(void *tmp=gcbaseva; tmp<gcbaseva+BAMBOO_SHARED_MEM_SIZE;tmp+=ALIGNMENTSIZE) {
580     unsigned int objlength=getMarkedLength(tmp);
581     void *forwarding=gcmappingtbl[OBJMAPPINGINDEX(tmp)];
582     if (tmp>=nextvalid&&((objlength!=0)!=(forwarding!=NULL))) {
583       tprintf("Maps disagree tmp=%x olength=%u forwarding=%x\n",tmp, objlength, forwarding);
584     }
585     if (tmp<nextvalid&&forwarding!=NULL) {
586       tprintf("Weird forwarding pointer\n");
587     }
588     if (tmp>=nextvalid&&(objlength!=0||forwarding!=NULL)) {
589       unsigned int length=ALIGNSIZETOBYTES(objlength);
590       unsigned int size;
591       unsigned int type;
592       nextvalid=tmp+length;
593       gettype_size(tmp, &type, &size);
594       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
595       if (size!=length) {
596         tprintf("Bad size in bitmap: tmp=%x length=%u size=%u type=%u\n", tmp, length, size, type);
597       }
598       block_t blockindex;
599       BLOCKINDEX(blockindex, forwarding);
600       struct blockrecord * block=&allocationinfo.blocktable[blockindex];
601       void *blockptr=OFFSET2BASEVA(blockindex)+gcbaseva;
602
603       if (block->status==BS_FREE) {
604         if (forwarding>(blockptr+block->usedspace)) {
605           tprintf("Pointer references free space forwarding=%x tmp=%x length=%u type=%u blockindex=%u, baseptr=%x, usedspace=%u, status=%u\n", forwarding, tmp, length, type,blockindex, blockptr, block->usedspace, block->status);
606         }
607       }
608     }
609   }
610 #endif
611
612   GCPROFILE_ITEM_MASTER();
613
614   //just in case we didn't get blocks back...
615   if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
616     allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
617
618   // compute live object space
619   GCPROFILE_RECORD_SPACE_MASTER();
620   GC_PRINTF("compact phase finished \n");
621 }
622
623 #endif // MULTICORE_GC