Previous fix is still buggy, this one really fix that bug
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
1 #ifdef MULTICORE_GC
2 #include "structdefs.h"
3 #include "multicoregccompact.h"
4 #include "runtime_arch.h"
5 #include "multicoreruntime.h"
6 #include "multicoregarbage.h"
7 #include "markbit.h"
8 #include "multicoremem_helper.h"
9 #include "gcqueue.h"
10
11 int gc_countRunningCores() {
12   int count=0;
13   for(int i = 0; i < NUMCORES4GC; i++) {
14     if(returnedmem[i]) {
15       count++;
16     }
17   }
18   return count;
19 }
20
21 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
22   // init the dst ptr
23   to->localblocknum = 0;
24   BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
25   to->ptr = to->base;
26   to->bound=to->base+BLOCKSIZE(to->localblocknum);
27   
28   // init the orig ptr
29   orig->localblocknum = 0;
30   orig->ptr=orig->base = to->base;
31   orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
32 #ifdef GC_CACHE_ADAPT
33   to->pagebound=to->base+BAMBOO_PAGE_SIZE;
34   orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
35 #endif
36 }
37
38 void getSpaceLocally(struct moveHelper *to) {
39   //we have space on our core...just keep going
40   to->localblocknum++;
41   BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
42   to->ptr=to->base;
43   to->bound=to->base+BLOCKSIZE(to->localblocknum);
44 #ifdef GC_CACHE_ADAPT
45   to->pagebound=to->base+BAMBOO_PAGE_SIZE;
46 #endif
47 }
48
49 //This function is called on the master core only...and typically by
50 //the message interrupt handler
51
52 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
53   unsigned int blockindex;
54   BLOCKINDEX(blockindex, heaptop);
55   unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
56   //this core is done as far as memory usage is concerned
57   returnedmem[cnum]=0;
58
59   struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
60
61   unsigned INTPTR newusedspace=heaptop-OFFSET2BASEVA(blockindex)-gcbaseva;
62   if((blockrecord->corenum!=cnum) && (newusedspace==0)) {
63     // In this case, the cnum core just filled up a previous block and returned
64     // the end address of that block which belongs to the following block. We 
65     // need to fix up the blockindex to the previous full block. As that block 
66     // has been marked as BS_USED already, nothing else is needed here.
67     blockindex--;
68     localblocknum=GLOBALBLOCK2LOCAL(blockindex);
69     blockrecord=&allocationinfo.blocktable[blockindex];
70   } else {
71     blockrecord->status=BS_FREE;
72     blockrecord->usedspace=newusedspace;
73     blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
74     /* Update the lowest free block */
75     if (blockindex < allocationinfo.lowestfreeblock) {
76       allocationinfo.lowestfreeblock=blockindex;
77     }
78   }
79
80   /* This is our own block...means we should mark other blocks above us as free*/
81
82   if (cnum==blockrecord->corenum) {
83     unsigned INTPTR nextlocalblocknum=localblocknum+1;
84     for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
85       unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);       
86       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
87       nextblockrecord->status=BS_FREE;
88       nextblockrecord->usedspace=0;       
89       //this is true because this cannot be the lowest block
90       nextblockrecord->freespace=BLOCKSIZE(1);
91     }
92   }
93
94   //this could be the last one....
95   int count=gc_countRunningCores();
96   if (gcmovepending==count) {
97     // All cores have stopped...hand out memory as necessary to handle all requests
98     handleMemoryRequests_I();
99   } else {
100     //see if returned memory blocks let us resolve requests
101     useReturnedMem(cnum, allocationinfo.lowestfreeblock);
102   }
103 }
104
105 void useReturnedMem(unsigned int retcorenum, block_t localblockindex) {
106   for(int i=0;i<NUMCORES4GC;i++) {
107     unsigned INTPTR requiredmem=gcrequiredmems[i];
108     if (requiredmem) {
109       unsigned INTPTR desiredmem=maxusefulmems[i];
110       unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
111       unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
112
113
114       for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
115         unsigned INTPTR blocknum=BLOCKINDEX2(retcorenum, nextlocalblocknum);
116         struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
117         if (nextblockrecord->status==BS_FREE) {
118           unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
119           if (freespace>=memcheck) {
120             nextblockrecord->status=BS_USED;
121             void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
122             unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
123             //taken care of one block
124             gcmovepending--;
125             void *startaddr=blockptr+usedspace;
126             gcrequiredmems[i]=0;
127             maxusefulmems[i]=0;
128             if (i==STARTUPCORE) {
129               gctomove = true;
130               gcmovestartaddr = startaddr;
131             } else if(BAMBOO_CHECK_SEND_MODE()) {
132               cache_msg_2_I(i,GCMOVESTART,startaddr);
133             } else {
134               send_msg_2_I(i,GCMOVESTART,startaddr);
135             }
136           }
137         }
138       }
139     }
140   }
141 }
142
143 void handleReturnMem(unsigned int cnum, void *heaptop) {
144   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
145   handleReturnMem_I(cnum, heaptop);
146   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
147 }
148
149 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
150   //need to get another block from elsewhere
151   //set flag to wait for memory
152
153   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
154     gctomove=false;
155     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
156     void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
157     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
158
159     if (startaddr) {
160       gcmovestartaddr=startaddr;
161     } else {
162       while(!gctomove) ;
163     }
164   } else {
165     gctomove=false;
166     //send request for memory
167     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
168     //wait for flag to be set that we received message
169     while(!gctomove)
170       ;
171   }
172
173   //store pointer
174   to->ptr = gcmovestartaddr;
175
176   //set localblock number to high number to indicate this block isn't local
177   to->localblocknum = MAXBLOCK;
178   unsigned int globalblocknum;
179   BLOCKINDEX(globalblocknum, to->ptr);
180   to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
181   to->bound=gcbaseva+BOUNDPTR(globalblocknum);
182 #ifdef GC_CACHE_ADAPT
183   to->pagebound=(void *)((int)((int)(to->ptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
184 #endif
185 }
186
187 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
188   //need more space to compact into
189   if ((to->localblocknum+1) < gcblock2fill) {
190     getSpaceLocally(to);
191   } else {
192     getSpaceRemotely(to, minimumbytes);
193   }
194 }
195
196 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
197   bool senttopmessage=false;
198   while(true) {
199     if ((gccurr_heaptop <= ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
200       //This block is the last for this core...let the startup know
201       if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
202         handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
203       } else {
204         send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
205       }
206       //Only send the message once
207       senttopmessage=true;
208     }
209     unsigned int minimumbytes=COMPACTUNITS(orig, to);
210     if (orig->ptr==orig->bound) {
211       //need more data to compact
212       //increment the core
213       orig->localblocknum++;
214       BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
215       orig->ptr=orig->base;
216       orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
217 #ifdef GC_CACHE_ADAPT
218       orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
219 #endif
220       if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE) {
221         CACHEADAPT_FINISH_COMPACT(to->ptr);
222         break;
223       }
224     }
225     if (minimumbytes!=0) {
226       getSpace(to, minimumbytes);
227     }
228   }
229   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
230     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
231     handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
232     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
233   } else {
234     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
235   }
236 }
237
238 void * checkNeighbors_I(int ncorenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
239   int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
240   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
241   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
242
243   for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
244     for(int i=0;i<NUM_CORES2TEST;i++) {
245       int neighborcore=core2test[ncorenum][i];
246       if (neighborcore!=-1) {
247         block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
248         struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
249         if (block->status==BS_FREE) {
250           unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
251           if (memcheck<=freespace) {
252             //we have a block
253             //mark block as used
254             block->status=BS_USED;
255             void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
256             unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
257             return blockptr+usedspace;
258           }
259         }
260       }
261     }
262   }
263   return NULL;
264 }
265
266 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
267   unsigned int firstfree=NOFREEBLOCK;
268   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
269   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
270
271   for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
272     struct blockrecord * block=&allocationinfo.blocktable[i];
273     if (block->status==BS_FREE) {
274       if(firstfree==NOFREEBLOCK)
275         firstfree=i;
276       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
277       if (memcheck<=freespace) {
278         //we have a block
279         //mark block as used
280         block->status=BS_USED;
281         void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
282         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
283         allocationinfo.lowestfreeblock=firstfree;
284         return blockptr+usedspace;
285       }
286     }
287   }
288   allocationinfo.lowestfreeblock=firstfree;
289   return NULL;
290 }
291
292 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
293   unsigned INTPTR requiredmem=gcrequiredmems[core];
294   unsigned INTPTR desiredmem=maxusefulmems[core];
295   block_t firstfree=NOFREEBLOCK;
296   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
297   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
298
299   for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
300     struct blockrecord * block=&allocationinfo.blocktable[searchblock];
301     if (block->status==BS_FREE) {
302       if(firstfree==NOFREEBLOCK)
303         firstfree=searchblock;
304       //don't take a block from another core that hasn't returned its memory yet
305       if (block->corenum!=core&&returnedmem[block->corenum])
306         continue;
307       
308       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
309       if (freespace>=memcheck) {
310         //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
311
312         //we have a block
313         //mark block as used
314         block->status=BS_USED;
315         void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
316         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
317         allocationinfo.lowestfreeblock=firstfree;
318         //taken care of one block
319         gcmovepending--;
320         void *startaddr=blockptr+usedspace;
321         if (core==STARTUPCORE) {
322           gctomove=true;
323           gcmovestartaddr=startaddr;
324         } else if(BAMBOO_CHECK_SEND_MODE()) {
325           cache_msg_2_I(core,GCMOVESTART,startaddr);
326         } else {
327           send_msg_2_I(core,GCMOVESTART,startaddr);
328         }
329         return;
330       }
331     }
332   }
333   //this is bad...ran out of memory
334   printf("Out of memory.  Was trying for %u bytes\n", threshold);
335   BAMBOO_EXIT();
336 }
337
338 void handleMemoryRequests_I() {
339   unsigned int lowestblock=allocationinfo.lowestfreeblock;
340   if (lowestblock==NOFREEBLOCK) {
341     lowestblock=numblockspercore*NUMCORES4GC;
342   }
343   
344   for(int i=0;i < NUMCORES4GC; i++) {
345     if (gcrequiredmems[i]) {
346       handleOneMemoryRequest(i, lowestblock);
347       lowestblock=allocationinfo.lowestfreeblock;
348     }
349   }
350 }
351
352 /* should be invoked with interrupt turned off */
353
354 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
355   if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
356     //There are spare blocks
357     unsigned int topblock=numblockspercore*NUMCORES4GC;
358     void *memblock;
359     
360     if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
361       return memblock;
362     } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
363       return memblock;
364     }
365   }
366   
367   // If we cannot find spare mem right now, hold the request
368   gcrequiredmems[requiredcore] = requiredmem;
369   maxusefulmems[requiredcore]=desiredmem;
370   gcmovepending++;
371
372   int count=gc_countRunningCores();
373   if (gcmovepending==count) {
374     // All cores have stopped...hand out memory as necessary to handle all requests
375     handleMemoryRequests_I();
376   }
377
378   return NULL;
379
380
381 #ifdef GC_CACHE_ADAPT
382 unsigned int compactblockshelper(struct moveHelper * orig, struct moveHelper * to) {
383   unsigned int minimumbytes=0;
384   void *origptr=orig->ptr;
385   void *origbound=orig->bound;
386   void * tmporig=orig->ptr;
387   void * tmpto=to->ptr;
388
389   while(true) {
390     //call compactblocks using the page boundaries at the current bounds
391     minimumbytes=compactblocks(orig, to);
392     if(minimumbytes == 0) {
393       //bump the orig page bound...
394       //use old orig pointer to make sure we get correct block
395       CACHEADAPT_FINISH_SRC_PAGE(tmporig, tmpto, to->ptr);
396       if (orig->ptr<origbound) {
397         tmporig=orig->ptr;
398         tmpto=to->ptr;
399         orig->pagebound=orig->pagebound+BAMBOO_PAGE_SIZE;
400       } else {
401         return 0;
402       }
403     } else {
404       // require more memory
405       void *endtoptr=to->ptr+minimumbytes;
406       if (endtoptr>to->bound) {
407         CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, 0);
408         return minimumbytes;
409       } else {
410         CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, minimumbytes);
411         to->pagebound=((((unsigned INTPTR)endtoptr)-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
412         //update pointers to avoid double counting the stuff we already added in
413         tmporig=orig->ptr+minimumbytes;
414         tmpto=to->ptr+minimumbytes;
415       }
416     }
417   }
418 }
419 #endif
420
421 /* This function is performance critical...  spend more time optimizing it */
422
423 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
424   void *toptrinit=to->ptr;
425   void *toptr=toptrinit;
426   void *origptr=orig->ptr;
427 #ifdef GC_CACHE_ADAPT
428   void *origbound=orig->pagebound;
429   void *tobound=to->pagebound;
430   //set to the first line so we don't need conditions
431   void *lastflush=(void *)(((unsigned INTPTR)&gcmappingtbl[OBJMAPPINGINDEX(origptr)])&~(BAMBOO_CACHE_LINE_MASK));
432 #else
433   void *origbound=orig->bound;
434   void *tobound=to->bound;
435 #endif
436   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
437   unsigned int objlength;
438
439   while(origptr<origbound) {
440     //Try to skip over stuff fast first
441     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
442     unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
443     if (!gcmarktbl[arrayoffset]) {
444       do {
445         arrayoffset++;
446         if (arrayoffset>=origendoffset) {
447           //finished with block(a page in CACHE_ADAPT version)...
448           to->ptr=toptr;
449           orig->ptr=origbound;
450           gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
451 #ifdef GC_CACHE_ADAPT
452           BAMBOO_CACHE_FLUSH_LINE(lastflush);
453 #endif
454           return 0;
455         }
456       } while(!gcmarktbl[arrayoffset]);
457       origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
458     }
459     
460     //Scan more carefully next
461     objlength=getMarkedLength(origptr);
462
463     if (objlength!=NOTMARKED) {
464       unsigned int length=ALIGNSIZETOBYTES(objlength);
465
466       //code between this and next comment should be removed
467 #ifdef GC_DEBUG
468       unsigned int size;
469       unsigned int type;
470       gettype_size(origptr, &type, &size);
471       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
472       
473       if (size!=length) {
474         tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
475         unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
476         unsigned INTPTR hibits=alignsize>>4;
477         unsigned INTPTR lobits=(alignsize&15)<<1;
478         tprintf("hibits=%x lobits=%x\n", hibits, lobits);
479         tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
480       }
481 #endif
482       //end of code to remove
483
484       void *endtoptr=toptr+length;
485       if (endtoptr>tobound) {
486         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
487         to->ptr=toptr;
488         orig->ptr=origptr;
489 #ifdef GC_CACHE_ADAPT
490         BAMBOO_CACHE_FLUSH_LINE(lastflush);
491 #endif
492         return length;
493       }
494       //good to move objects and update pointers
495       
496       void ** mapptr=&gcmappingtbl[OBJMAPPINGINDEX(origptr)];
497       *mapptr=toptr;
498
499 #ifdef GC_CACHE_ADAPT
500       void *maskmapptr=(void *)(((unsigned INTPTR)mapptr)&~(BAMBOO_CACHE_LINE_MASK));
501
502       if (lastflush!=maskmapptr) {
503         BAMBOO_CACHE_FLUSH_LINE(lastflush);
504         lastflush=maskmapptr;
505       }
506 #endif
507
508       origptr+=length;
509       toptr=endtoptr;
510     } else
511       origptr+=ALIGNMENTSIZE;
512   }
513   to->ptr=toptr;
514   orig->ptr=origptr;
515   gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
516 #ifdef GC_CACHE_ADAPT
517   BAMBOO_CACHE_FLUSH_LINE(lastflush);
518 #endif
519   return 0;
520 }
521
522 void compact() {
523   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
524   
525   // initialize structs for compacting
526   struct moveHelper orig;
527   struct moveHelper to;
528   initOrig_Dst(&orig, &to);
529
530   compacthelper(&orig, &to);
531 #ifdef GC_CACHE_ADAPT
532   BAMBOO_CACHE_MF();
533 #endif
534
535
536 void master_compact() {
537   // predict number of blocks to fill for each core
538   numblockspercore = loadbalance()+1;
539   numblockspercore = (numblockspercore>GCNUMLOCALBLOCK)?GCNUMLOCALBLOCK:numblockspercore;
540
541
542   GC_PRINTF("mark phase finished \n");
543   
544   gc_resetCoreStatus();
545   //initialize local data structures first....we don't want remote requests messing data up
546   unsigned int initblocks=numblockspercore*NUMCORES4GC;
547   allocationinfo.lowestfreeblock=NOFREEBLOCK;
548
549   //assigned blocks
550   for(int i=0;i<initblocks;i++) {
551     allocationinfo.blocktable[i].status=BS_USED;
552   }
553
554   //free blocks
555   for(int i=initblocks;i<GCNUMBLOCK;i++) {
556     allocationinfo.blocktable[i].status=BS_FREE;
557     allocationinfo.blocktable[i].usedspace=0;
558     //this is true because all cores have at least one block already...
559     allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
560   }
561
562   //start all of the cores
563   for(int i = 0; i < NUMCORES4GC; i++) {
564     // init some data strutures for compact phase
565     gcrequiredmems[i] = 0;
566     gccorestatus[i] = 1;
567     returnedmem[i] = 1;
568     //send start compact messages to all cores
569     if(i != STARTUPCORE) {
570       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
571     } else {
572       gcblock2fill = numblockspercore;
573     }
574   }
575   GCPROFILE_ITEM_MASTER();
576   // compact phase
577   compact();
578   /* wait for all cores to finish compacting */
579   GC_PRINTF("master finished\n");
580
581   while(!gc_checkCoreStatus())
582     ;
583
584 // GC_CACHE_COHERENT_ON should be true for gcmappingtbl, and the gcmappingtbl should be zeroed out before starting gc
585 #ifdef GC_DEBUG
586   void *nextvalid=gcbaseva;
587   for(void *tmp=gcbaseva; tmp<gcbaseva+BAMBOO_SHARED_MEM_SIZE;tmp+=ALIGNMENTSIZE) {
588     unsigned int objlength=getMarkedLength(tmp);
589     void *forwarding=gcmappingtbl[OBJMAPPINGINDEX(tmp)];
590     if (tmp>=nextvalid&&((objlength!=0)!=(forwarding!=NULL))) {
591       tprintf("Maps disagree tmp=%x olength=%u forwarding=%x\n",tmp, objlength, forwarding);
592     }
593     if (tmp<nextvalid&&forwarding!=NULL) {
594       tprintf("Weird forwarding pointer\n");
595     }
596     if (tmp>=nextvalid&&(objlength!=0||forwarding!=NULL)) {
597       unsigned int length=ALIGNSIZETOBYTES(objlength);
598       unsigned int size;
599       unsigned int type;
600       nextvalid=tmp+length;
601       gettype_size(tmp, &type, &size);
602       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
603       if (size!=length) {
604         tprintf("Bad size in bitmap: tmp=%x length=%u size=%u type=%u\n", tmp, length, size, type);
605       }
606       block_t blockindex;
607       BLOCKINDEX(blockindex, forwarding);
608       struct blockrecord * block=&allocationinfo.blocktable[blockindex];
609       void *blockptr=OFFSET2BASEVA(blockindex)+gcbaseva;
610
611       if (block->status==BS_FREE) {
612         if (forwarding>(blockptr+block->usedspace)) {
613           tprintf("Pointer references free space forwarding=%x tmp=%x length=%u type=%u blockindex=%u, baseptr=%x, usedspace=%u, status=%u\n", forwarding, tmp, length, type,blockindex, blockptr, block->usedspace, block->status);
614         }
615       }
616     }
617   }
618 #endif
619
620   GCPROFILE_ITEM_MASTER();
621
622   //just in case we didn't get blocks back...
623   if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
624     allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
625
626   // compute live object space
627   GCPROFILE_RECORD_SPACE_MASTER();
628   GC_PRINTF("compact phase finished \n");
629 }
630
631 #endif // MULTICORE_GC