2 #include "multicoregccompact.h"
3 #include "runtime_arch.h"
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
7 #include "multicoremem_helper.h"
10 int gc_countRunningCores() {
12 for(int i = 0; i < NUMCORES4GC; i++) {
20 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
22 to->localblocknum = 0;
23 BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
25 to->bound=to->base+BLOCKSIZE(to->localblocknum);
28 orig->localblocknum = 0;
29 orig->ptr=orig->base = to->base;
30 orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
32 to->pagebound=to->base+BAMBOO_PAGE_SIZE;
33 orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
37 void getSpaceLocally(struct moveHelper *to) {
38 //we have space on our core...just keep going
40 BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
42 to->bound = to->base + BLOCKSIZE(to->localblocknum);
44 to->pagebound=to->base+BAMBOO_PAGE_SIZE;
48 //This function is called on the master core only...and typically by
49 //the message interrupt handler
51 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
52 unsigned int blockindex;
53 BLOCKINDEX(blockindex, heaptop);
54 unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
55 //this core is done as far as memory usage is concerned
58 struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
60 blockrecord->status=BS_FREE;
61 blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
62 blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
63 /* Update the lowest free block */
64 if (blockindex < allocationinfo.lowestfreeblock) {
65 allocationinfo.lowestfreeblock=blockindex;
68 /* This is our own block...means we should mark other blocks above us as free*/
70 if (cnum==blockrecord->corenum) {
71 unsigned INTPTR nextlocalblocknum=localblocknum+1;
72 for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
73 unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
74 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
75 nextblockrecord->status=BS_FREE;
76 nextblockrecord->usedspace=0;
77 //this is true because this cannot be the lowest block
78 nextblockrecord->freespace=BLOCKSIZE(1);
82 //this could be the last one....
83 int count=gc_countRunningCores();
84 if (gcmovepending==count) {
85 // All cores have stopped...hand out memory as necessary to handle all requests
86 handleMemoryRequests_I();
88 //see if returned memory blocks let us resolve requests
89 useReturnedMem(cnum, allocationinfo.lowestfreeblock);
93 void useReturnedMem(unsigned int corenum, block_t localblockindex) {
94 for(int i=0;i<NUMCORES4GC;i++) {
95 unsigned INTPTR requiredmem=gcrequiredmems[i];
97 unsigned INTPTR desiredmem=maxusefulmems[i];
98 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
99 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
102 for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
103 unsigned INTPTR blocknum=BLOCKINDEX2(corenum, nextlocalblocknum);
104 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
105 if (nextblockrecord->status==BS_FREE) {
106 unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
107 if (freespace>=memcheck) {
108 nextblockrecord->status=BS_USED;
109 void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
110 unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
111 //taken care of one block
113 void *startaddr=blockptr+usedspace;
116 if (i==STARTUPCORE) {
118 gcmovestartaddr = startaddr;
119 } else if(BAMBOO_CHECK_SEND_MODE()) {
120 cache_msg_2_I(i,GCMOVESTART,startaddr);
122 send_msg_2_I(i,GCMOVESTART,startaddr);
131 void handleReturnMem(unsigned int cnum, void *heaptop) {
132 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
133 handleReturnMem_I(cnum, heaptop);
134 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
137 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
138 //need to get another block from elsewhere
139 //set flag to wait for memory
141 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
143 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
144 void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
145 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
148 gcmovestartaddr=startaddr;
154 //send request for memory
155 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
156 //wait for flag to be set that we received message
162 to->ptr = gcmovestartaddr;
164 //set localblock number to high number to indicate this block isn't local
165 to->localblocknum = MAXBLOCK;
166 unsigned int globalblocknum;
167 BLOCKINDEX(globalblocknum, to->ptr);
168 to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
169 to->bound = gcbaseva + BOUNDPTR(globalblocknum);
170 #ifdef GC_CACHE_ADAPT
171 to->pagebound=(void *)((int)((int)(to->ptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
175 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
176 //need more space to compact into
177 if (to->localblocknum < gcblock2fill) {
180 getSpaceRemotely(to, minimumbytes);
184 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
185 bool senttopmessage=false;
187 if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
188 //This block is the last for this core...let the startup know
189 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
190 handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
192 send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
194 //Only send the message once
197 unsigned int minimumbytes=COMPACTUNITS(orig, to);
198 if (orig->ptr==orig->bound) {
199 //need more data to compact
201 orig->localblocknum++;
202 BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
203 orig->ptr=orig->base;
204 orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
205 #ifdef GC_CACHE_ADAPT
206 orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
208 if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
211 if (minimumbytes!=0) {
212 getSpace(to, minimumbytes);
215 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
216 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
217 handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
218 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
220 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
224 void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
225 int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
226 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
227 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
229 for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
230 for(int i=0;i<NUM_CORES2TEST;i++) {
231 int neighborcore=core2test[corenum][i];
232 if (neighborcore!=-1) {
233 block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
234 struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
235 if (block->status==BS_FREE) {
236 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
237 if (memcheck<=freespace) {
240 block->status=BS_USED;
241 void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
242 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
243 return blockptr+usedspace;
252 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
253 unsigned int firstfree=NOFREEBLOCK;
254 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
255 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
257 for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
258 struct blockrecord * block=&allocationinfo.blocktable[i];
259 if (block->status==BS_FREE) {
260 if(firstfree==NOFREEBLOCK)
262 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
263 if (memcheck<=freespace) {
266 block->status=BS_USED;
267 void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
268 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
269 allocationinfo.lowestfreeblock=firstfree;
270 return blockptr+usedspace;
274 allocationinfo.lowestfreeblock=firstfree;
278 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
279 unsigned INTPTR requiredmem=gcrequiredmems[core];
280 unsigned INTPTR desiredmem=maxusefulmems[core];
281 block_t firstfree=NOFREEBLOCK;
282 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
283 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
285 for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
286 struct blockrecord * block=&allocationinfo.blocktable[searchblock];
287 if (block->status==BS_FREE) {
288 if(firstfree==NOFREEBLOCK)
289 firstfree=searchblock;
290 //don't take a block from another core that hasn't returned its memory yet
291 if (block->corenum!=core&&returnedmem[block->corenum])
294 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
295 if (freespace>=memcheck) {
296 //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
300 block->status=BS_USED;
301 void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
302 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
303 allocationinfo.lowestfreeblock=firstfree;
304 //taken care of one block
306 void *startaddr=blockptr+usedspace;
307 if(BAMBOO_CHECK_SEND_MODE()) {
308 cache_msg_2_I(core,GCMOVESTART,startaddr);
310 send_msg_2_I(core,GCMOVESTART,startaddr);
316 //this is bad...ran out of memory
317 printf("Out of memory. Was trying for %u bytes\n", threshold);
321 void handleMemoryRequests_I() {
322 unsigned int lowestblock=allocationinfo.lowestfreeblock;
323 if (lowestblock==NOFREEBLOCK) {
324 lowestblock=numblockspercore*NUMCORES4GC;
327 for(int i=0;i < NUMCORES4GC; i++) {
328 if (gcrequiredmems[i]) {
329 handleOneMemoryRequest(i, lowestblock);
330 lowestblock=allocationinfo.lowestfreeblock;
335 /* should be invoked with interrupt turned off */
337 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
338 if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
339 //There are spare blocks
340 unsigned int topblock=numblockspercore*NUMCORES4GC;
343 if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
345 } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
350 // If we cannot find spare mem right now, hold the request
351 gcrequiredmems[requiredcore] = requiredmem;
352 maxusefulmems[requiredcore]=desiredmem;
355 int count=gc_countRunningCores();
356 if (gcmovepending==count) {
357 // All cores have stopped...hand out memory as necessary to handle all requests
358 handleMemoryRequests_I();
364 #ifdef GC_CACHE_ADAPT
365 /* To compute access rate per pages, we need to compact to page boundary
366 * instead of block boundary
368 unsigned int compactpages(struct moveHelper * orig, struct moveHelper * to) {
369 void *toptrinit=to->ptr;
370 void *toptr=toptrinit;
371 void *tobound=to->bound;
372 void *topagebound=to->pagebound;
373 void *origptr=orig->ptr;
374 void *origpagebound=orig->pagebound;
375 unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origpagebound-gcbaseva));
376 unsigned int objlength;
377 while((origptr<origpagebound)&&(toptr<topagebound)){
378 //Try to skip over stuff fast first
379 unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
380 unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
381 if (!gcmarktbl[arrayoffset]) {
384 if (arrayoffset>=origendoffset) {
385 //finished with a page...
386 origptr=origpagebound;
389 orig->pagebound+=BAMBOO_PAGE_SIZE;
390 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
391 // close the last destination page
392 CACHEADAPT_COMPLETE_PAGE_CONVERT(origptr, toptr, toptr);
395 } while(!gcmarktbl[arrayoffset]);
396 origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
399 //Scan more carefully next
400 objlength=getMarkedLength(origptr);
402 if (objlength!=NOTMARKED) {
403 unsigned int length=ALIGNSIZETOBYTES(objlength);
405 //code between this and next comment should be removed
409 gettype_size(origptr, &type, &size);
410 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
413 tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
414 unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
415 unsigned INTPTR hibits=alignsize>>4;
416 unsigned INTPTR lobits=(alignsize&15)<<1;
417 tprintf("hibits=%x lobits=%x\n", hibits, lobits);
418 tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
421 //end of code to remove
423 void *endtoptr=toptr+length;
424 if (endtoptr>tobound) {
425 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
428 // close a destination page, update the revise profiling info
429 CACHEADAPT_COMPLETE_PAGE_CONVERT(origptr, tobound, toptr);
432 //good to move objects and update pointers
433 //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
435 gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
440 origptr+=ALIGNMENTSIZE;
444 orig->pagebound=(void *)((int)(((int)origptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
445 to->pagebound=(void *)((int)(((int)toptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
446 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
447 // close the last destination page
448 CACHEADAPT_COMPLETE_PAGE_CONVERT(origptr, toptr, toptr);
453 /* This function is performance critical... spend more time optimizing it */
455 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
456 void *toptrinit=to->ptr;
457 void *toptr=toptrinit;
458 void *tobound=to->bound;
459 void *origptr=orig->ptr;
460 void *origbound=orig->bound;
461 unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
462 unsigned int objlength;
463 while(origptr<origbound) {
464 //Try to skip over stuff fast first
465 unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
466 unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
467 if (!gcmarktbl[arrayoffset]) {
470 if (arrayoffset>=origendoffset) {
471 //finished with block...
475 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
478 } while(!gcmarktbl[arrayoffset]);
479 origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
482 //Scan more carefully next
483 objlength=getMarkedLength(origptr);
485 if (objlength!=NOTMARKED) {
486 unsigned int length=ALIGNSIZETOBYTES(objlength);
488 //code between this and next comment should be removed
492 gettype_size(origptr, &type, &size);
493 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
496 tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
497 unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
498 unsigned INTPTR hibits=alignsize>>4;
499 unsigned INTPTR lobits=(alignsize&15)<<1;
500 tprintf("hibits=%x lobits=%x\n", hibits, lobits);
501 tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
504 //end of code to remove
506 void *endtoptr=toptr+length;
507 if (endtoptr>tobound) {
508 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
513 //good to move objects and update pointers
514 //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
516 gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
521 origptr+=ALIGNMENTSIZE;
525 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
532 BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
534 // initialize structs for compacting
535 struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
536 struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
537 initOrig_Dst(&orig, &to);
539 CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
540 compacthelper(&orig, &to);
543 void master_compact() {
544 // predict number of blocks to fill for each core
545 numblockspercore = loadbalance()+1;
547 GC_PRINTF("mark phase finished \n");
549 gc_resetCoreStatus();
550 //initialize local data structures first....we don't want remote requests messing data up
551 unsigned int initblocks=numblockspercore*NUMCORES4GC;
552 allocationinfo.lowestfreeblock=NOFREEBLOCK;
555 for(int i=0;i<initblocks;i++) {
556 allocationinfo.blocktable[i].status=BS_USED;
560 for(int i=initblocks;i<GCNUMBLOCK;i++) {
561 allocationinfo.blocktable[i].status=BS_FREE;
562 allocationinfo.blocktable[i].usedspace=0;
563 //this is true because all cores have at least one block already...
564 allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
567 //start all of the cores
568 for(int i = 0; i < NUMCORES4GC; i++) {
569 // init some data strutures for compact phase
570 gcrequiredmems[i] = 0;
573 //send start compact messages to all cores
574 if(i != STARTUPCORE) {
575 send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
577 gcblock2fill = numblockspercore;
583 /* wait for all cores to finish compacting */
586 while(!gc_checkCoreStatus())
591 //just in case we didn't get blocks back...
592 if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
593 allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
595 GC_PRINTF("compact phase finished \n");
598 #endif // MULTICORE_GC