remove debug printouts
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
1 #ifdef MULTICORE_GC
2 #include "multicoregccompact.h"
3 #include "runtime_arch.h"
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "markbit.h"
7 #include "multicoremem_helper.h"
8
9 int gc_countRunningCores() {
10   int count=0;
11   for(int i = 0; i < NUMCORES4GC; i++) {
12     if(returnedmem[i]) {
13       count++;
14     }
15   }
16   return count;
17 }
18
19 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
20   // init the dst ptr
21   to->localblocknum = 0;
22   BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
23   to->ptr = to->base;
24   to->bound=to->base+BLOCKSIZE(to->localblocknum);
25   
26   // init the orig ptr
27   orig->localblocknum = 0;
28   orig->ptr=orig->base = to->base;
29   orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
30 }
31
32 void getSpaceLocally(struct moveHelper *to) {
33   //we have space on our core...just keep going
34   to->localblocknum++;
35   BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
36   to->ptr=to->base;
37   to->bound = to->base + BLOCKSIZE(to->localblocknum);
38 }
39
40 //This function is called on the master core only...and typically by
41 //the message interrupt handler
42
43 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
44   unsigned int blockindex;
45   BLOCKINDEX(blockindex, heaptop);
46   unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
47   //this core is done as far as memory usage is concerned
48   returnedmem[cnum]=0;
49
50   struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
51
52   blockrecord->status=BS_FREE;
53   blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
54   blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
55   /* Update the lowest free block */
56   if (blockindex < allocationinfo.lowestfreeblock) {
57     allocationinfo.lowestfreeblock=blockindex;
58   }
59
60   /* This is our own block...means we should mark other blocks above us as free*/
61   if (cnum==blockrecord->corenum) {
62     unsigned INTPTR nextlocalblocknum=localblocknum+1;
63     for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
64       unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
65       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
66       nextblockrecord->status=BS_FREE;
67       nextblockrecord->usedspace=0;
68       //this is true because this cannot be the lowest block
69       nextblockrecord->freespace=BLOCKSIZE(1);
70     }
71   }
72
73   //this could be the last one....
74   int count=gc_countRunningCores();
75   if (gcmovepending==count) {
76     // All cores have stopped...hand out memory as necessary to handle all requests
77     handleMemoryRequests_I();
78   } else {
79     //see if returned memory blocks let us resolve requests
80     useReturnedMem(cnum, allocationinfo.lowestfreeblock);
81   }
82 }
83
84 void useReturnedMem(unsigned int corenum, block_t localblockindex) {
85   for(int i=0;i<NUMCORES4GC;i++) {
86     unsigned INTPTR requiredmem=gcrequiredmems[i];
87     if (requiredmem) {
88       unsigned INTPTR desiredmem=maxusefulmems[i];
89       unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
90       unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
91
92
93       for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
94         unsigned INTPTR blocknum=BLOCKINDEX2(corenum, nextlocalblocknum);
95         struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
96         if (nextblockrecord->status==BS_FREE) {
97           unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
98           if (freespace>=memcheck) {
99             nextblockrecord->status=BS_USED;
100             void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
101             unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
102             //taken care of one block
103             gcmovepending--;
104             void *startaddr=blockptr+usedspace;
105             gcrequiredmems[i]=0;
106             maxusefulmems[i]=0;
107             if (i==STARTUPCORE) {
108               gctomove = true;
109               gcmovestartaddr = startaddr;
110             } else if(BAMBOO_CHECK_SEND_MODE()) {
111               cache_msg_2_I(i,GCMOVESTART,startaddr);
112             } else {
113               send_msg_2_I(i,GCMOVESTART,startaddr);
114             }
115           }
116         }
117       }
118     }
119   }
120 }
121
122 void handleReturnMem(unsigned int cnum, void *heaptop) {
123   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
124   handleReturnMem_I(cnum, heaptop);
125   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
126 }
127
128 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
129   //need to get another block from elsewhere
130   //set flag to wait for memory
131
132   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
133     gctomove=false;
134     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
135     void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
136     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
137
138     if (startaddr) {
139       gcmovestartaddr=startaddr;
140     } else {
141       while(!gctomove) ;
142     }
143   } else {
144     gctomove=false;
145     //send request for memory
146     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
147     //wait for flag to be set that we received message
148     while(!gctomove)
149       ;
150   }
151
152   //store pointer
153   to->ptr = gcmovestartaddr;
154
155   //set localblock number to high number to indicate this block isn't local
156   to->localblocknum = MAXBLOCK;
157   unsigned int globalblocknum;
158   BLOCKINDEX(globalblocknum, to->ptr);
159   to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
160   to->bound = gcbaseva + BOUNDPTR(globalblocknum);
161 }
162
163 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
164   //need more space to compact into
165   if (to->localblocknum < gcblock2fill) {
166     getSpaceLocally(to);
167   } else {
168     getSpaceRemotely(to, minimumbytes);
169   }
170 }
171
172 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
173   bool senttopmessage=false;
174   while(true) {
175     if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
176       //This block is the last for this core...let the startup know
177       if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
178         handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
179       } else {
180         send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
181       }
182       //Only send the message once
183       senttopmessage=true;
184     }
185     unsigned int minimumbytes=compactblocks(orig, to);
186     if (orig->ptr==orig->bound) {
187       //need more data to compact
188       //increment the core
189       orig->localblocknum++;
190       BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
191       orig->ptr=orig->base;
192       orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
193       if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
194         break;
195     }
196     if (minimumbytes!=0) {
197       getSpace(to, minimumbytes);
198     }
199   }
200   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
201     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
202     handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
203     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
204   } else {
205     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
206   }
207 }
208
209 void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
210   int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
211   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
212   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
213
214   for(int i=0;i<NUM_CORES2TEST;i++) {
215     int neighborcore=core2test[corenum][i];
216     if (neighborcore!=-1) {
217       for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
218         block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
219         struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
220         if (block->status==BS_FREE) {
221           unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
222           if (memcheck<=freespace) {
223             //we have a block
224             //mark block as used
225             block->status=BS_USED;
226             void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
227             unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
228             return blockptr+usedspace;
229           }
230         }
231       }
232     }
233   }
234   return NULL;
235 }
236
237 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
238   unsigned int firstfree=NOFREEBLOCK;
239   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
240   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
241
242   for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
243     struct blockrecord * block=&allocationinfo.blocktable[i];
244     if (block->status==BS_FREE) {
245       if(firstfree==NOFREEBLOCK)
246         firstfree=i;
247       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
248       if (memcheck<=freespace) {
249         //we have a block
250         //mark block as used
251         block->status=BS_USED;
252         void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
253         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
254         allocationinfo.lowestfreeblock=firstfree;
255         return blockptr+usedspace;
256       }
257     }
258   }
259   allocationinfo.lowestfreeblock=firstfree;
260   return NULL;
261 }
262
263 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
264   unsigned INTPTR requiredmem=gcrequiredmems[core];
265   unsigned INTPTR desiredmem=maxusefulmems[core];
266   block_t firstfree=NOFREEBLOCK;
267   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
268   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
269
270   for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
271     struct blockrecord * block=&allocationinfo.blocktable[searchblock];
272     if (block->status==BS_FREE) {
273       if(firstfree==NOFREEBLOCK)
274         firstfree=searchblock;
275       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
276       if (freespace>=memcheck) {
277         //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
278
279         //we have a block
280         //mark block as used
281         block->status=BS_USED;
282         void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
283         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
284         allocationinfo.lowestfreeblock=firstfree;
285         //taken care of one block
286         gcmovepending--;
287         void *startaddr=blockptr+usedspace;
288         if(BAMBOO_CHECK_SEND_MODE()) {
289           cache_msg_2_I(core,GCMOVESTART,startaddr);
290         } else {
291           send_msg_2_I(core,GCMOVESTART,startaddr);
292         }
293         return;
294       }
295     }
296   }
297   //this is bad...ran out of memory
298   printf("Out of memory.  Was trying for %u bytes\n", threshold);
299   BAMBOO_EXIT();
300 }
301
302 void handleMemoryRequests_I() {
303   unsigned int lowestblock=allocationinfo.lowestfreeblock;
304   if (lowestblock==NOFREEBLOCK) {
305     lowestblock=numblockspercore*NUMCORES4GC;
306   }
307   
308   for(int i=0;i < NUMCORES4GC; i++) {
309     if (gcrequiredmems[i]) {
310       handleOneMemoryRequest(i, lowestblock);
311       lowestblock=allocationinfo.lowestfreeblock;
312     }
313   }
314 }
315
316 /* should be invoked with interrupt turned off */
317
318 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
319   if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
320     //There are spare blocks
321     unsigned int topblock=numblockspercore*NUMCORES4GC;
322     void *memblock;
323     
324     if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
325       return memblock;
326     } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
327       return memblock;
328     }
329   }
330   
331   // If we cannot find spare mem right now, hold the request
332   gcrequiredmems[requiredcore] = requiredmem;
333   maxusefulmems[requiredcore]=desiredmem;
334   gcmovepending++;
335
336   int count=gc_countRunningCores();
337   if (gcmovepending==count) {
338     // All cores have stopped...hand out memory as necessary to handle all requests
339     handleMemoryRequests_I();
340   }
341
342   return NULL;
343
344
345 /* This function is performance critical...  spend more time optimizing it */
346
347 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
348   void *toptrinit=to->ptr;
349   void *toptr=toptrinit;
350   void *tobound=to->bound;
351   void *origptr=orig->ptr;
352   void *origbound=orig->bound;
353   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
354   unsigned int objlength;
355   while(origptr<origbound) {
356     //Try to skip over stuff fast first
357     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
358     unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
359     if (!gcmarktbl[arrayoffset]) {
360       do {
361         arrayoffset++;
362         if (arrayoffset>=origendoffset) {
363           //finished with block...
364           origptr=origbound;
365           to->ptr=toptr;
366           orig->ptr=origptr;
367           gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
368           return 0;
369         }
370       } while(!gcmarktbl[arrayoffset]);
371       origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
372     }
373
374     //Scan more carefully next
375     objlength=getMarkedLength(origptr);
376
377     if (objlength!=NOTMARKED) {
378       unsigned int length=ALIGNSIZETOBYTES(objlength);
379
380       //code between this and next comment should be removed
381 #ifdef GC_DEBUG
382       unsigned int size;
383       unsigned int type;
384       gettype_size(origptr, &type, &size);
385       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
386       
387       if (size!=length) {
388         tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
389         unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
390         unsigned INTPTR hibits=alignsize>>4;
391         unsigned INTPTR lobits=(alignsize&15)<<1;
392         tprintf("hibits=%x lobits=%x\n", hibits, lobits);
393         tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
394       }
395 #endif
396       //end of code to remove
397
398       void *endtoptr=toptr+length;
399       if (endtoptr>tobound) {
400         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
401         to->ptr=tobound;
402         orig->ptr=origptr;
403         return length;
404       }
405       //good to move objects and update pointers
406       //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
407
408       gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
409
410       origptr+=length;
411       toptr=endtoptr;
412     } else
413       origptr+=ALIGNMENTSIZE;
414   }
415   to->ptr=toptr;
416   orig->ptr=origptr;
417   gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
418   return 0;
419 }
420
421 void compact() {
422   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
423   
424   // initialize structs for compacting
425   struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
426   struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
427   initOrig_Dst(&orig, &to);
428
429   CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
430
431   compacthelper(&orig, &to);
432
433
434 void master_compact() {
435   // predict number of blocks to fill for each core
436   numblockspercore = loadbalance()+1;
437   
438   GC_PRINTF("mark phase finished \n");
439   
440   gc_resetCoreStatus();
441   //initialize local data structures first....we don't want remote requests messing data up
442   unsigned int initblocks=numblockspercore*NUMCORES4GC;
443   allocationinfo.lowestfreeblock=NOFREEBLOCK;
444
445   //assigned blocks
446   for(int i=0;i<initblocks;i++) {
447     allocationinfo.blocktable[i].status=BS_USED;
448   }
449
450   //free blocks
451   for(int i=initblocks;i<GCNUMBLOCK;i++) {
452     allocationinfo.blocktable[i].status=BS_FREE;
453     allocationinfo.blocktable[i].usedspace=0;
454     //this is true because all cores have at least one block already...
455     allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
456   }
457
458   //start all of the cores
459   for(int i = 0; i < NUMCORES4GC; i++) {
460     // init some data strutures for compact phase
461     gcrequiredmems[i] = 0;
462     gccorestatus[i] = 1;
463     returnedmem[i] = 1;
464     //send start compact messages to all cores
465     if(i != STARTUPCORE) {
466       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
467     } else {
468       gcblock2fill = numblockspercore;
469     }
470   }
471   GCPROFILE_ITEM();
472   // compact phase
473   compact();
474   /* wait for all cores to finish compacting */
475   
476
477   while(!gc_checkCoreStatus())
478     ;
479
480   GCPROFILE_ITEM();
481
482   //just in case we didn't get blocks back...
483   if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
484     allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
485
486   GC_PRINTF("compact phase finished \n");
487 }
488
489 #endif // MULTICORE_GC