18e3bf8f954b0b5386cea62d68223dd965c7a5a7
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
1 #ifdef MULTICORE_GC
2 #include "multicoregccompact.h"
3 #include "runtime_arch.h"
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "markbit.h"
7 #include "multicoremem_helper.h"
8 #include "gcqueue.h"
9
10 int gc_countRunningCores() {
11   int count=0;
12   for(int i = 0; i < NUMCORES4GC; i++) {
13     if(returnedmem[i]) {
14       count++;
15     }
16   }
17   return count;
18 }
19
20 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
21   // init the dst ptr
22   to->localblocknum = 0;
23   BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
24   to->ptr = to->base;
25   to->bound=to->base+BLOCKSIZE(to->localblocknum);
26   
27   // init the orig ptr
28   orig->localblocknum = 0;
29   orig->ptr=orig->base = to->base;
30   orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
31 }
32
33 void getSpaceLocally(struct moveHelper *to) {
34   //we have space on our core...just keep going
35   to->localblocknum++;
36   BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
37   to->ptr=to->base;
38   to->bound = to->base + BLOCKSIZE(to->localblocknum);
39 }
40
41 //This function is called on the master core only...and typically by
42 //the message interrupt handler
43
44 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
45   unsigned int blockindex;
46   BLOCKINDEX(blockindex, heaptop);
47   unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
48   //this core is done as far as memory usage is concerned
49   returnedmem[cnum]=0;
50
51   struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
52
53   blockrecord->status=BS_FREE;
54   blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
55   blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
56   /* Update the lowest free block */
57   if (blockindex < allocationinfo.lowestfreeblock) {
58     allocationinfo.lowestfreeblock=blockindex;
59   }
60
61   /* This is our own block...means we should mark other blocks above us as free*/
62   
63   if (cnum==blockrecord->corenum) {
64     unsigned INTPTR nextlocalblocknum=localblocknum+1;
65     for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
66       unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
67       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
68       nextblockrecord->status=BS_FREE;
69       nextblockrecord->usedspace=0;
70       //this is true because this cannot be the lowest block
71       nextblockrecord->freespace=BLOCKSIZE(1);
72     }
73   }
74
75   //this could be the last one....
76   int count=gc_countRunningCores();
77   if (gcmovepending==count) {
78     // All cores have stopped...hand out memory as necessary to handle all requests
79     handleMemoryRequests_I();
80   } else {
81     //see if returned memory blocks let us resolve requests
82     useReturnedMem(cnum, allocationinfo.lowestfreeblock);
83   }
84 }
85
86 void useReturnedMem(unsigned int corenum, block_t localblockindex) {
87   for(int i=0;i<NUMCORES4GC;i++) {
88     unsigned INTPTR requiredmem=gcrequiredmems[i];
89     if (requiredmem) {
90       unsigned INTPTR desiredmem=maxusefulmems[i];
91       unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
92       unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
93
94
95       for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
96         unsigned INTPTR blocknum=BLOCKINDEX2(corenum, nextlocalblocknum);
97         struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
98         if (nextblockrecord->status==BS_FREE) {
99           unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
100           if (freespace>=memcheck) {
101             nextblockrecord->status=BS_USED;
102             void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
103             unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
104             //taken care of one block
105             gcmovepending--;
106             void *startaddr=blockptr+usedspace;
107             gcrequiredmems[i]=0;
108             maxusefulmems[i]=0;
109             if (i==STARTUPCORE) {
110               gctomove = true;
111               gcmovestartaddr = startaddr;
112             } else if(BAMBOO_CHECK_SEND_MODE()) {
113               cache_msg_2_I(i,GCMOVESTART,startaddr);
114             } else {
115               send_msg_2_I(i,GCMOVESTART,startaddr);
116             }
117           }
118         }
119       }
120     }
121   }
122 }
123
124 void handleReturnMem(unsigned int cnum, void *heaptop) {
125   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
126   handleReturnMem_I(cnum, heaptop);
127   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
128 }
129
130 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
131   //need to get another block from elsewhere
132   //set flag to wait for memory
133
134   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
135     gctomove=false;
136     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
137     void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
138     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
139
140     if (startaddr) {
141       gcmovestartaddr=startaddr;
142     } else {
143       while(!gctomove) ;
144     }
145   } else {
146     gctomove=false;
147     //send request for memory
148     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
149     //wait for flag to be set that we received message
150     while(!gctomove)
151       ;
152   }
153
154   //store pointer
155   to->ptr = gcmovestartaddr;
156
157   //set localblock number to high number to indicate this block isn't local
158   to->localblocknum = MAXBLOCK;
159   unsigned int globalblocknum;
160   BLOCKINDEX(globalblocknum, to->ptr);
161   to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
162   to->bound = gcbaseva + BOUNDPTR(globalblocknum);
163 }
164
165 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
166   //need more space to compact into
167   if (to->localblocknum < gcblock2fill) {
168     getSpaceLocally(to);
169   } else {
170     getSpaceRemotely(to, minimumbytes);
171   }
172 }
173
174 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
175   bool senttopmessage=false;
176   while(true) {
177     if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
178       //This block is the last for this core...let the startup know
179       if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
180         handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
181       } else {
182         send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
183       }
184       //Only send the message once
185       senttopmessage=true;
186     }
187     unsigned int minimumbytes=compactblocks(orig, to);
188
189     if (orig->ptr==orig->bound) {
190       //need more data to compact
191       //increment the core
192       orig->localblocknum++;
193       BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
194       orig->ptr=orig->base;
195       orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
196       if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
197         break;
198     }
199     if (minimumbytes!=0) {
200       getSpace(to, minimumbytes);
201     }
202   }
203   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
204     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
205     handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
206     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
207   } else {
208     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
209   }
210 }
211
212 void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
213   int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
214   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
215   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
216
217   for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
218     for(int i=0;i<NUM_CORES2TEST;i++) {
219       int neighborcore=core2test[corenum][i];
220       if (neighborcore!=-1) {
221         block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
222         struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
223         if (block->status==BS_FREE) {
224           unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
225           if (memcheck<=freespace) {
226             //we have a block
227             //mark block as used
228             block->status=BS_USED;
229             void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
230             unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
231             return blockptr+usedspace;
232           }
233         }
234       }
235     }
236   }
237   return NULL;
238 }
239
240 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
241   unsigned int firstfree=NOFREEBLOCK;
242   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
243   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
244
245   for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
246     struct blockrecord * block=&allocationinfo.blocktable[i];
247     if (block->status==BS_FREE) {
248       if(firstfree==NOFREEBLOCK)
249         firstfree=i;
250       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
251       if (memcheck<=freespace) {
252         //we have a block
253         //mark block as used
254         block->status=BS_USED;
255         void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
256         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
257         allocationinfo.lowestfreeblock=firstfree;
258         return blockptr+usedspace;
259       }
260     }
261   }
262   allocationinfo.lowestfreeblock=firstfree;
263   return NULL;
264 }
265
266 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
267   unsigned INTPTR requiredmem=gcrequiredmems[core];
268   unsigned INTPTR desiredmem=maxusefulmems[core];
269   block_t firstfree=NOFREEBLOCK;
270   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
271   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
272
273   for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
274     struct blockrecord * block=&allocationinfo.blocktable[searchblock];
275     if (block->status==BS_FREE) {
276       if(firstfree==NOFREEBLOCK)
277         firstfree=searchblock;
278       //don't take a block from another core that hasn't returned its memory yet
279       if (block->corenum!=core&&returnedmem[block->corenum])
280         continue;
281       
282       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
283       if (freespace>=memcheck) {
284         //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
285
286         //we have a block
287         //mark block as used
288         block->status=BS_USED;
289         void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
290         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
291         allocationinfo.lowestfreeblock=firstfree;
292         //taken care of one block
293         gcmovepending--;
294         void *startaddr=blockptr+usedspace;
295         if(BAMBOO_CHECK_SEND_MODE()) {
296           cache_msg_2_I(core,GCMOVESTART,startaddr);
297         } else {
298           send_msg_2_I(core,GCMOVESTART,startaddr);
299         }
300         return;
301       }
302     }
303   }
304   //this is bad...ran out of memory
305   printf("Out of memory.  Was trying for %u bytes\n", threshold);
306   BAMBOO_EXIT();
307 }
308
309 void handleMemoryRequests_I() {
310   unsigned int lowestblock=allocationinfo.lowestfreeblock;
311   if (lowestblock==NOFREEBLOCK) {
312     lowestblock=numblockspercore*NUMCORES4GC;
313   }
314   
315   for(int i=0;i < NUMCORES4GC; i++) {
316     if (gcrequiredmems[i]) {
317       handleOneMemoryRequest(i, lowestblock);
318       lowestblock=allocationinfo.lowestfreeblock;
319     }
320   }
321 }
322
323 /* should be invoked with interrupt turned off */
324
325 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
326   if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
327     //There are spare blocks
328     unsigned int topblock=numblockspercore*NUMCORES4GC;
329     void *memblock;
330     
331     if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
332       return memblock;
333     } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
334       return memblock;
335     }
336   }
337   
338   // If we cannot find spare mem right now, hold the request
339   gcrequiredmems[requiredcore] = requiredmem;
340   maxusefulmems[requiredcore]=desiredmem;
341   gcmovepending++;
342
343   int count=gc_countRunningCores();
344   if (gcmovepending==count) {
345     // All cores have stopped...hand out memory as necessary to handle all requests
346     handleMemoryRequests_I();
347   }
348
349   return NULL;
350
351
352 /* This function is performance critical...  spend more time optimizing it */
353
354 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
355   void *toptrinit=to->ptr;
356   void *toptr=toptrinit;
357   void *tobound=to->bound;
358   void *origptr=orig->ptr;
359   void *origbound=orig->bound;
360   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
361   unsigned int objlength;
362   while(origptr<origbound) {
363     //Try to skip over stuff fast first
364     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
365     unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
366     if (!gcmarktbl[arrayoffset]) {
367       do {
368         arrayoffset++;
369         if (arrayoffset>=origendoffset) {
370           //finished with block...
371           origptr=origbound;
372           to->ptr=toptr;
373           orig->ptr=origptr;
374           gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
375           return 0;
376         }
377       } while(!gcmarktbl[arrayoffset]);
378       origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
379     }
380
381     //Scan more carefully next
382     objlength=getMarkedLength(origptr);
383
384     if (objlength!=NOTMARKED) {
385       unsigned int length=ALIGNSIZETOBYTES(objlength);
386
387       //code between this and next comment should be removed
388 #ifdef GC_DEBUG
389       unsigned int size;
390       unsigned int type;
391       gettype_size(origptr, &type, &size);
392       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
393       
394       if (size!=length) {
395         tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
396         unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
397         unsigned INTPTR hibits=alignsize>>4;
398         unsigned INTPTR lobits=(alignsize&15)<<1;
399         tprintf("hibits=%x lobits=%x\n", hibits, lobits);
400         tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
401       }
402 #endif
403       //end of code to remove
404
405       void *endtoptr=toptr+length;
406       if (endtoptr>tobound) {
407         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
408         to->ptr=tobound;
409         orig->ptr=origptr;
410         return length;
411       }
412       //good to move objects and update pointers
413       //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
414
415       gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
416
417       origptr+=length;
418       toptr=endtoptr;
419     } else
420       origptr+=ALIGNMENTSIZE;
421   }
422   to->ptr=toptr;
423   orig->ptr=origptr;
424   gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
425   return 0;
426 }
427
428 void compact() {
429   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
430   
431   // initialize structs for compacting
432   struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
433   struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
434   initOrig_Dst(&orig, &to);
435
436   CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
437   compacthelper(&orig, &to);
438
439
440 void master_compact() {
441   // predict number of blocks to fill for each core
442   numblockspercore = loadbalance()+1;
443   
444   GC_PRINTF("mark phase finished \n");
445   
446   gc_resetCoreStatus();
447   //initialize local data structures first....we don't want remote requests messing data up
448   unsigned int initblocks=numblockspercore*NUMCORES4GC;
449   allocationinfo.lowestfreeblock=NOFREEBLOCK;
450
451   //assigned blocks
452   for(int i=0;i<initblocks;i++) {
453     allocationinfo.blocktable[i].status=BS_USED;
454   }
455
456   //free blocks
457   for(int i=initblocks;i<GCNUMBLOCK;i++) {
458     allocationinfo.blocktable[i].status=BS_FREE;
459     allocationinfo.blocktable[i].usedspace=0;
460     //this is true because all cores have at least one block already...
461     allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
462   }
463
464   //start all of the cores
465   for(int i = 0; i < NUMCORES4GC; i++) {
466     // init some data strutures for compact phase
467     gcrequiredmems[i] = 0;
468     gccorestatus[i] = 1;
469     returnedmem[i] = 1;
470     //send start compact messages to all cores
471     if(i != STARTUPCORE) {
472       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
473     } else {
474       gcblock2fill = numblockspercore;
475     }
476   }
477   GCPROFILE_ITEM();
478   // compact phase
479   compact();
480   /* wait for all cores to finish compacting */
481   
482
483   while(!gc_checkCoreStatus())
484     ;
485
486   GCPROFILE_ITEM();
487
488   //just in case we didn't get blocks back...
489   if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
490     allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
491
492   GC_PRINTF("compact phase finished \n");
493 }
494
495 #endif // MULTICORE_GC