51a255a24aaf886de3af3decb51785bd55f1cfc5
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
1 #ifdef MULTICORE_GC
2 #include "multicoregccompact.h"
3 #include "runtime_arch.h"
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "markbit.h"
7 #include "multicoremem_helper.h"
8
9 int gc_countRunningCores() {
10   int count=0;
11   for(int i = 0; i < NUMCORES4GC; i++) {
12     if(returnedmem[i]) {
13       count++;
14     }
15   }
16   return count;
17 }
18
19 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
20   // init the dst ptr
21   to->localblocknum = 0;
22   BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
23   to->ptr = to->base;
24   to->bound=to->base+BLOCKSIZE(to->localblocknum);
25   
26   // init the orig ptr
27   orig->localblocknum = 0;
28   orig->ptr=orig->base = to->base;
29   orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
30 }
31
32 void getSpaceLocally(struct moveHelper *to) {
33   //we have space on our core...just keep going
34   to->localblocknum++;
35   BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
36   to->ptr=to->base;
37   to->bound = to->base + BLOCKSIZE(to->localblocknum);
38 }
39
40 //This function is called on the master core only...and typically by
41 //the message interrupt handler
42
43 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
44   unsigned int blockindex;
45   BLOCKINDEX(blockindex, heaptop);
46   unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
47
48   //this core is done as far as memory usage is concerned
49   returnedmem[cnum]=0;
50
51   struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
52
53   blockrecord->status=BS_FREE;
54   blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex));
55   blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
56   /* Update the lowest free block */
57   if (blockindex < allocationinfo.lowestfreeblock) {
58     blockindex=allocationinfo.lowestfreeblock;
59   }
60
61   /* This is our own block...means we should mark other blocks above us as free*/
62   if (cnum==blockrecord->corenum) {
63     unsigned INTPTR nextlocalblocknum=localblocknum+1;
64     for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
65       unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
66       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blockindex];
67       nextblockrecord->status=BS_FREE;
68       nextblockrecord->usedspace=0;
69       //this is true because this cannot be the lowest block
70       nextblockrecord->freespace=BLOCKSIZE(1);
71     }
72   }
73 }
74
75 void handleReturnMem(unsigned int cnum, void *heaptop) {
76   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
77   handleReturnMem_I(cnum, heaptop);
78   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
79 }
80
81 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
82   //need to get another block from elsewhere
83   //set flag to wait for memory
84   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
85     printf("A: %d\n", BAMBOO_NUM_OF_CORE);
86
87     gctomove=false;
88     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
89     void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
90     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
91     if (startaddr) {
92       gcmovestartaddr=startaddr;
93     } else {
94       while(!gctomove) ;
95     }
96     printf("B: %d\n", BAMBOO_NUM_OF_CORE);
97   } else {
98     printf("C: %d\n", BAMBOO_NUM_OF_CORE);
99     gctomove=false;
100     //send request for memory
101     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
102     //wait for flag to be set that we received message
103     while(!gctomove) ;
104     printf("D: %d\n", BAMBOO_NUM_OF_CORE);
105   }
106
107   //store pointer
108   to->ptr = gcmovestartaddr;
109
110   //set localblock number to high number to indicate this block isn't local
111   to->localblocknum = MAXBLOCK;
112   unsigned int globalblocknum;
113   BLOCKINDEX(globalblocknum, to->ptr);
114   to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
115   to->bound = gcbaseva + BOUNDPTR(globalblocknum);
116 }
117
118 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
119   //need more space to compact into
120   if (to->localblocknum < gcblock2fill) {
121     getSpaceLocally(to);
122   } else {
123     getSpaceRemotely(to, minimumbytes);
124   }
125 }
126
127 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
128   bool senttopmessage=false;
129   while(true) {
130     if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
131       //This block is the last for this core...let the startup know
132       if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
133         handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
134       } else {
135         send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
136       }
137       //Only send the message once
138       senttopmessage=true;
139     }
140     unsigned int minimumbytes=compactblocks(orig, to);
141     if (orig->ptr==orig->bound) {
142       //need more data to compact
143       //increment the core
144       orig->localblocknum++;
145       BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
146       orig->ptr=orig->base;
147       orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
148       if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
149         break;
150     }
151     if (minimumbytes!=0) {
152       getSpace(to, minimumbytes);
153     }
154   }
155
156   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
157     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
158     handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
159     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
160   } else {
161     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
162   }
163
164 }
165
166 void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
167   int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
168   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
169   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
170
171   for(int i=0;i<NUM_CORES2TEST;i++) {
172     int neighborcore=core2test[corenum][i];
173     if (neighborcore!=-1) {
174       for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
175         block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
176         struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
177         if (block->status==BS_FREE) {
178           unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
179           if (memcheck<=freespace) {
180             //we have a block
181             //mark block as used
182             block->status=BS_USED;
183             void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
184             unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
185             return blockptr+usedspace;
186           }
187         }
188       }
189     }
190   }
191   return NULL;
192 }
193
194 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
195   unsigned int firstfree=NOFREEBLOCK;
196   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
197   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
198
199   for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
200     struct blockrecord * block=&allocationinfo.blocktable[i];
201     if (block->status==BS_FREE) {
202       if(firstfree==NOFREEBLOCK)
203         firstfree=i;
204       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
205       if (memcheck<=freespace) {
206         //we have a block
207         //mark block as used
208         block->status=BS_USED;
209         void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
210         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
211         allocationinfo.lowestfreeblock=firstfree;
212         return blockptr+usedspace;
213       }
214     }
215   }
216   allocationinfo.lowestfreeblock=firstfree;
217   return NULL;
218 }
219
220 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
221   unsigned INTPTR requiredmem=gcrequiredmems[core];
222   unsigned INTPTR desiredmem=maxusefulmems[core];
223   block_t firstfree=NOFREEBLOCK;
224   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
225   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
226
227   for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
228     struct blockrecord * block=&allocationinfo.blocktable[searchblock];
229     if (block->status==BS_FREE) {
230       if(firstfree==NOFREEBLOCK)
231         firstfree=searchblock;
232       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
233       if (freespace>=memcheck) {
234         //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
235
236         //we have a block
237         //mark block as used
238         block->status=BS_USED;
239         void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
240         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
241         allocationinfo.lowestfreeblock=firstfree;
242         //taken care of one block
243         gcmovepending--;
244         void *startaddr=blockptr+usedspace;
245         if(BAMBOO_CHECK_SEND_MODE()) {
246           cache_msg_2_I(core,GCMOVESTART,startaddr);
247         } else {
248           send_msg_2_I(core,GCMOVESTART,startaddr);
249         }
250         return;
251       }
252     }
253   }
254   //this is bad...ran out of memory
255   BAMBOO_EXIT();
256 }
257
258 void handleMemoryRequests_I() {
259   unsigned int lowestblock=allocationinfo.lowestfreeblock;
260   if (lowestblock==NOFREEBLOCK) {
261     lowestblock=numblockspercore*NUMCORES4GC;
262   }
263   
264   for(int i=0;i < NUMCORES4GC; i++) {
265     if (gcrequiredmems[i]) {
266       handleOneMemoryRequest(i, lowestblock);
267       lowestblock=allocationinfo.lowestfreeblock;
268     }
269   }
270 }
271
272 /* should be invoked with interrupt turned off */
273
274 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
275   if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
276     //There are spare blocks
277     unsigned int topblock=numblockspercore*NUMCORES4GC;
278     void *memblock;
279     
280     if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
281       return memblock;
282     } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
283       return memblock;
284     }
285   }
286   
287   // If we cannot find spare mem right now, hold the request
288   gcrequiredmems[requiredcore] = requiredmem;
289   gcmovepending++;
290
291   int count=gc_countRunningCores();
292   if (gcmovepending==count) {
293     // All cores have stopped...hand out memory as necessary to handle all requests
294     handleMemoryRequests_I();
295   }
296
297   return NULL;
298
299
300 /* This function is performance critical...  spend more time optimizing it */
301
302 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
303   void *toptrinit=to->ptr;
304   void *toptr=toptrinit;
305   void *tobound=to->bound;
306   void *origptr=orig->ptr;
307   void *origbound=orig->bound;
308   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
309   unsigned int objlength;
310
311   while(origptr<origbound) {
312     //Try to skip over stuff fast first
313     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
314     unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
315     if (!gcmarktbl[arrayoffset]) {
316       do {
317         arrayoffset++;
318         if (arrayoffset<origendoffset) {
319           //finished with block...
320           origptr=origbound;
321           to->ptr=toptr;
322           orig->ptr=origptr;
323           gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
324           return 0;
325         }
326       } while(!gcmarktbl[arrayoffset]);
327       origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
328     }
329
330     //Scan more carefully next
331     objlength=getMarkedLength(origptr);
332
333     if (objlength!=NOTMARKED) {
334       unsigned int length=ALIGNSIZETOBYTES(objlength);
335       void *endtoptr=toptr+length;
336       if (endtoptr>tobound) {
337         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
338         to->ptr=tobound;
339         orig->ptr=origptr;
340         return length;
341       }
342       //good to move objects and update pointers
343       gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
344       origptr+=length;
345       toptr=endtoptr;
346     } else
347       origptr+=ALIGNMENTSIZE;
348   }
349 }
350
351 void compact() {
352   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
353   
354   // initialize structs for compacting
355   struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
356   struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
357   initOrig_Dst(&orig, &to);
358
359   CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
360
361   compacthelper(&orig, &to);
362
363
364 void master_compact() {
365   // predict number of blocks to fill for each core
366   numblockspercore = loadbalance()+1;
367   
368   GC_PRINTF("mark phase finished \n");
369   
370   gc_resetCoreStatus();
371   //initialize local data structures first....we don't want remote requests messing data up
372   unsigned int initblocks=numblockspercore*NUMCORES4GC;
373   allocationinfo.lowestfreeblock=NOFREEBLOCK;
374
375   //assigned blocks
376   for(int i=0;i<initblocks;i++) {
377     allocationinfo.blocktable[i].status=BS_USED;
378   }
379
380   //free blocks
381   for(int i=initblocks;i<GCNUMBLOCK;i++) {
382     allocationinfo.blocktable[i].status=BS_FREE;
383     allocationinfo.blocktable[i].usedspace=0;
384     //this is true because all cores have at least one block already...
385     allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
386   }
387
388   //start all of the cores
389   for(int i = 0; i < NUMCORES4GC; i++) {
390     // init some data strutures for compact phase
391     gcrequiredmems[i] = 0;
392     gccorestatus[i] = 1;
393     returnedmem[i] = 1;
394     //send start compact messages to all cores
395     if(i != STARTUPCORE) {
396       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
397     } else {
398       gcblock2fill = numblockspercore;
399     }
400   }
401   GCPROFILE_ITEM();
402   // compact phase
403   compact();
404   /* wait for all cores to finish compacting */
405   tprintf("MASTER WAITING\n");
406
407   while(!gc_checkCoreStatus())
408     ;
409
410   tprintf("POST_WAIT\n");
411   GCPROFILE_ITEM();
412
413   //just in case we didn't get blocks back...
414   if (allocationinfo.lowestfreeblock=NOFREEBLOCK)
415     allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
416
417   GC_PRINTF("compact phase finished \n");
418 }
419
420 #endif // MULTICORE_GC