2 #include "multicoregccompact.h"
3 #include "runtime_arch.h"
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
7 #include "multicoremem_helper.h"
10 int gc_countRunningCores() {
12 for(int i = 0; i < NUMCORES4GC; i++) {
20 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
22 to->localblocknum = 0;
23 BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
25 to->bound=to->base+BLOCKSIZE(to->localblocknum);
28 orig->localblocknum = 0;
29 orig->ptr=orig->base = to->base;
30 orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
33 void getSpaceLocally(struct moveHelper *to) {
34 //we have space on our core...just keep going
36 BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
38 to->bound = to->base + BLOCKSIZE(to->localblocknum);
41 //This function is called on the master core only...and typically by
42 //the message interrupt handler
44 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
45 unsigned int blockindex;
46 BLOCKINDEX(blockindex, heaptop);
47 unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
48 //this core is done as far as memory usage is concerned
51 struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
53 blockrecord->status=BS_FREE;
54 blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
55 blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
56 /* Update the lowest free block */
57 if (blockindex < allocationinfo.lowestfreeblock) {
58 allocationinfo.lowestfreeblock=blockindex;
61 /* This is our own block...means we should mark other blocks above us as free*/
63 if (cnum==blockrecord->corenum) {
64 unsigned INTPTR nextlocalblocknum=localblocknum+1;
65 for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
66 unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
67 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
68 nextblockrecord->status=BS_FREE;
69 nextblockrecord->usedspace=0;
70 //this is true because this cannot be the lowest block
71 nextblockrecord->freespace=BLOCKSIZE(1);
75 //this could be the last one....
76 int count=gc_countRunningCores();
77 if (gcmovepending==count) {
78 // All cores have stopped...hand out memory as necessary to handle all requests
79 handleMemoryRequests_I();
81 //see if returned memory blocks let us resolve requests
82 useReturnedMem(cnum, allocationinfo.lowestfreeblock);
86 void useReturnedMem(unsigned int corenum, block_t localblockindex) {
87 for(int i=0;i<NUMCORES4GC;i++) {
88 unsigned INTPTR requiredmem=gcrequiredmems[i];
90 unsigned INTPTR desiredmem=maxusefulmems[i];
91 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
92 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
95 for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
96 unsigned INTPTR blocknum=BLOCKINDEX2(corenum, nextlocalblocknum);
97 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
98 if (nextblockrecord->status==BS_FREE) {
99 unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
100 if (freespace>=memcheck) {
101 nextblockrecord->status=BS_USED;
102 void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
103 unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
104 //taken care of one block
106 void *startaddr=blockptr+usedspace;
109 if (i==STARTUPCORE) {
111 gcmovestartaddr = startaddr;
112 } else if(BAMBOO_CHECK_SEND_MODE()) {
113 cache_msg_2_I(i,GCMOVESTART,startaddr);
115 send_msg_2_I(i,GCMOVESTART,startaddr);
124 void handleReturnMem(unsigned int cnum, void *heaptop) {
125 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
126 handleReturnMem_I(cnum, heaptop);
127 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
130 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
131 //need to get another block from elsewhere
132 //set flag to wait for memory
134 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
136 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
137 void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
138 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
141 gcmovestartaddr=startaddr;
147 //send request for memory
148 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
149 //wait for flag to be set that we received message
155 to->ptr = gcmovestartaddr;
157 //set localblock number to high number to indicate this block isn't local
158 to->localblocknum = MAXBLOCK;
159 unsigned int globalblocknum;
160 BLOCKINDEX(globalblocknum, to->ptr);
161 to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
162 to->bound = gcbaseva + BOUNDPTR(globalblocknum);
165 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
166 //need more space to compact into
167 if (to->localblocknum < gcblock2fill) {
170 getSpaceRemotely(to, minimumbytes);
174 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
175 bool senttopmessage=false;
177 if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
178 //This block is the last for this core...let the startup know
179 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
180 handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
182 send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
184 //Only send the message once
187 unsigned int minimumbytes=compactblocks(orig, to);
189 if (orig->ptr==orig->bound) {
190 //need more data to compact
192 orig->localblocknum++;
193 BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
194 orig->ptr=orig->base;
195 orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
196 if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
199 if (minimumbytes!=0) {
200 getSpace(to, minimumbytes);
203 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
204 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
205 handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
206 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
208 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
212 void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
213 int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
214 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
215 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
217 for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
218 for(int i=0;i<NUM_CORES2TEST;i++) {
219 int neighborcore=core2test[corenum][i];
220 if (neighborcore!=-1) {
221 block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
222 struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
223 if (block->status==BS_FREE) {
224 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
225 if (memcheck<=freespace) {
228 block->status=BS_USED;
229 void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
230 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
231 return blockptr+usedspace;
240 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
241 unsigned int firstfree=NOFREEBLOCK;
242 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
243 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
245 for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
246 struct blockrecord * block=&allocationinfo.blocktable[i];
247 if (block->status==BS_FREE) {
248 if(firstfree==NOFREEBLOCK)
250 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
251 if (memcheck<=freespace) {
254 block->status=BS_USED;
255 void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
256 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
257 allocationinfo.lowestfreeblock=firstfree;
258 return blockptr+usedspace;
262 allocationinfo.lowestfreeblock=firstfree;
266 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
267 unsigned INTPTR requiredmem=gcrequiredmems[core];
268 unsigned INTPTR desiredmem=maxusefulmems[core];
269 block_t firstfree=NOFREEBLOCK;
270 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
271 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
273 for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
274 struct blockrecord * block=&allocationinfo.blocktable[searchblock];
275 if (block->status==BS_FREE) {
276 if(firstfree==NOFREEBLOCK)
277 firstfree=searchblock;
278 //don't take a block from another core that hasn't returned its memory yet
279 if (block->corenum!=core&&returnedmem[block->corenum])
282 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
283 if (freespace>=memcheck) {
284 //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
288 block->status=BS_USED;
289 void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
290 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
291 allocationinfo.lowestfreeblock=firstfree;
292 //taken care of one block
294 void *startaddr=blockptr+usedspace;
295 if(BAMBOO_CHECK_SEND_MODE()) {
296 cache_msg_2_I(core,GCMOVESTART,startaddr);
298 send_msg_2_I(core,GCMOVESTART,startaddr);
304 //this is bad...ran out of memory
305 printf("Out of memory. Was trying for %u bytes\n", threshold);
309 void handleMemoryRequests_I() {
310 unsigned int lowestblock=allocationinfo.lowestfreeblock;
311 if (lowestblock==NOFREEBLOCK) {
312 lowestblock=numblockspercore*NUMCORES4GC;
315 for(int i=0;i < NUMCORES4GC; i++) {
316 if (gcrequiredmems[i]) {
317 handleOneMemoryRequest(i, lowestblock);
318 lowestblock=allocationinfo.lowestfreeblock;
323 /* should be invoked with interrupt turned off */
325 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
326 if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
327 //There are spare blocks
328 unsigned int topblock=numblockspercore*NUMCORES4GC;
331 if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
333 } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
338 // If we cannot find spare mem right now, hold the request
339 gcrequiredmems[requiredcore] = requiredmem;
340 maxusefulmems[requiredcore]=desiredmem;
343 int count=gc_countRunningCores();
344 if (gcmovepending==count) {
345 // All cores have stopped...hand out memory as necessary to handle all requests
346 handleMemoryRequests_I();
352 /* This function is performance critical... spend more time optimizing it */
354 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
355 void *toptrinit=to->ptr;
356 void *toptr=toptrinit;
357 void *tobound=to->bound;
358 void *origptr=orig->ptr;
359 void *origbound=orig->bound;
360 unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
361 unsigned int objlength;
362 while(origptr<origbound) {
363 //Try to skip over stuff fast first
364 unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
365 unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
366 if (!gcmarktbl[arrayoffset]) {
369 if (arrayoffset>=origendoffset) {
370 //finished with block...
374 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
377 } while(!gcmarktbl[arrayoffset]);
378 origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
381 //Scan more carefully next
382 objlength=getMarkedLength(origptr);
384 if (objlength!=NOTMARKED) {
385 unsigned int length=ALIGNSIZETOBYTES(objlength);
387 //code between this and next comment should be removed
391 gettype_size(origptr, &type, &size);
392 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
395 tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
396 unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
397 unsigned INTPTR hibits=alignsize>>4;
398 unsigned INTPTR lobits=(alignsize&15)<<1;
399 tprintf("hibits=%x lobits=%x\n", hibits, lobits);
400 tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
403 //end of code to remove
405 void *endtoptr=toptr+length;
406 if (endtoptr>tobound) {
407 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
412 //good to move objects and update pointers
413 //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
415 gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
420 origptr+=ALIGNMENTSIZE;
424 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
429 BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
431 // initialize structs for compacting
432 struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
433 struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
434 initOrig_Dst(&orig, &to);
436 CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
437 compacthelper(&orig, &to);
440 void master_compact() {
441 // predict number of blocks to fill for each core
442 numblockspercore = loadbalance()+1;
444 GC_PRINTF("mark phase finished \n");
446 gc_resetCoreStatus();
447 //initialize local data structures first....we don't want remote requests messing data up
448 unsigned int initblocks=numblockspercore*NUMCORES4GC;
449 allocationinfo.lowestfreeblock=NOFREEBLOCK;
452 for(int i=0;i<initblocks;i++) {
453 allocationinfo.blocktable[i].status=BS_USED;
457 for(int i=initblocks;i<GCNUMBLOCK;i++) {
458 allocationinfo.blocktable[i].status=BS_FREE;
459 allocationinfo.blocktable[i].usedspace=0;
460 //this is true because all cores have at least one block already...
461 allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
464 //start all of the cores
465 for(int i = 0; i < NUMCORES4GC; i++) {
466 // init some data strutures for compact phase
467 gcrequiredmems[i] = 0;
470 //send start compact messages to all cores
471 if(i != STARTUPCORE) {
472 send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
474 gcblock2fill = numblockspercore;
480 /* wait for all cores to finish compacting */
483 while(!gc_checkCoreStatus())
488 //just in case we didn't get blocks back...
489 if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
490 allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
492 GC_PRINTF("compact phase finished \n");
495 #endif // MULTICORE_GC