2 #include "structdefs.h"
3 #include "multicoregccompact.h"
4 #include "runtime_arch.h"
5 #include "multicoreruntime.h"
6 #include "multicoregarbage.h"
8 #include "multicoremem_helper.h"
11 int gc_countRunningCores() {
13 for(int i = 0; i < NUMCORES4GC; i++) {
21 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
23 to->localblocknum = 0;
24 BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
26 to->bound=to->base+BLOCKSIZE(to->localblocknum);
29 orig->localblocknum = 0;
30 orig->ptr=orig->base = to->base;
31 orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
33 to->pagebound=to->base+BAMBOO_PAGE_SIZE;
34 orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
38 void getSpaceLocally(struct moveHelper *to) {
39 //we have space on our core...just keep going
41 BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
43 to->bound=to->base+BLOCKSIZE(to->localblocknum);
45 to->pagebound=to->base+BAMBOO_PAGE_SIZE;
49 //This function is called on the master core only...and typically by
50 //the message interrupt handler
52 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
53 unsigned int blockindex;
54 BLOCKINDEX(blockindex, heaptop);
55 unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
56 //this core is done as far as memory usage is concerned
59 struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
61 unsigned INTPTR newusedspace=heaptop-OFFSET2BASEVA(blockindex)-gcbaseva;
62 if((blockrecord->corenum!=cnum) && (newusedspace==0)) {
63 // In this case, the cnum core just filled up a previous block and returned
64 // the end address of that block which belongs to the following block. We
65 // need to fix up the blockindex to the previous full block. As that block
66 // has been marked as BS_USED already, nothing else is needed here.
68 localblocknum=GLOBALBLOCK2LOCAL(blockindex);
69 blockrecord=&allocationinfo.blocktable[blockindex];
71 blockrecord->status=BS_FREE;
72 blockrecord->usedspace=newusedspace;
73 blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
74 /* Update the lowest free block */
75 if (blockindex < allocationinfo.lowestfreeblock) {
76 allocationinfo.lowestfreeblock=blockindex;
80 /* This is our own block...means we should mark other blocks above us as free*/
82 if (cnum==blockrecord->corenum) {
83 unsigned INTPTR nextlocalblocknum=localblocknum+1;
84 for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
85 unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
86 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
87 nextblockrecord->status=BS_FREE;
88 nextblockrecord->usedspace=0;
89 //this is true because this cannot be the lowest block
90 nextblockrecord->freespace=BLOCKSIZE(1);
94 //this could be the last one....
95 int count=gc_countRunningCores();
96 if (gcmovepending==count) {
97 // All cores have stopped...hand out memory as necessary to handle all requests
98 handleMemoryRequests_I();
100 //see if returned memory blocks let us resolve requests
101 useReturnedMem(cnum, allocationinfo.lowestfreeblock);
105 void useReturnedMem(unsigned int retcorenum, block_t localblockindex) {
106 for(int i=0;i<NUMCORES4GC;i++) {
107 unsigned INTPTR requiredmem=gcrequiredmems[i];
109 unsigned INTPTR desiredmem=maxusefulmems[i];
110 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
111 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
114 for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
115 unsigned INTPTR blocknum=BLOCKINDEX2(retcorenum, nextlocalblocknum);
116 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
117 if (nextblockrecord->status==BS_FREE) {
118 unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
119 if (freespace>=memcheck) {
120 nextblockrecord->status=BS_USED;
121 void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
122 unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
123 //taken care of one block
125 void *startaddr=blockptr+usedspace;
128 if (i==STARTUPCORE) {
130 gcmovestartaddr = startaddr;
131 } else if(BAMBOO_CHECK_SEND_MODE()) {
132 cache_msg_2_I(i,GCMOVESTART,startaddr);
134 send_msg_2_I(i,GCMOVESTART,startaddr);
143 void handleReturnMem(unsigned int cnum, void *heaptop) {
144 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
145 handleReturnMem_I(cnum, heaptop);
146 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
149 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
150 //need to get another block from elsewhere
151 //set flag to wait for memory
153 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
155 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
156 void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
157 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
160 gcmovestartaddr=startaddr;
166 //send request for memory
167 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
168 //wait for flag to be set that we received message
174 to->ptr = gcmovestartaddr;
176 //set localblock number to high number to indicate this block isn't local
177 to->localblocknum = MAXBLOCK;
178 unsigned int globalblocknum;
179 BLOCKINDEX(globalblocknum, to->ptr);
180 to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
181 to->bound=gcbaseva+BOUNDPTR(globalblocknum);
182 #ifdef GC_CACHE_ADAPT
183 to->pagebound=(void *)((int)((int)(to->ptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
187 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
188 //need more space to compact into
189 if ((to->localblocknum+1) < gcblock2fill) {
192 getSpaceRemotely(to, minimumbytes);
196 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
197 bool senttopmessage=false;
199 if ((gccurr_heaptop <= ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
200 //This block is the last for this core...let the startup know
201 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
202 handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
204 send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
206 //Only send the message once
209 unsigned int minimumbytes=COMPACTUNITS(orig, to);
210 if (orig->ptr==orig->bound) {
211 //need more data to compact
213 orig->localblocknum++;
214 BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
215 orig->ptr=orig->base;
216 orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
217 #ifdef GC_CACHE_ADAPT
218 orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
220 if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE) {
221 CACHEADAPT_FINISH_COMPACT(to->ptr);
225 if (minimumbytes!=0) {
226 getSpace(to, minimumbytes);
229 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
230 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
231 handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
232 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
234 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
238 void * checkNeighbors_I(int ncorenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
239 int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
240 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
241 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
243 for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
244 for(int i=0;i<NUM_CORES2TEST;i++) {
245 int neighborcore=core2test[ncorenum][i];
246 if (neighborcore!=-1) {
247 block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
248 struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
249 if (block->status==BS_FREE) {
250 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
251 if (memcheck<=freespace) {
254 block->status=BS_USED;
255 void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
256 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
257 return blockptr+usedspace;
266 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
267 unsigned int firstfree=NOFREEBLOCK;
268 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
269 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
271 for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
272 struct blockrecord * block=&allocationinfo.blocktable[i];
273 if (block->status==BS_FREE) {
274 if(firstfree==NOFREEBLOCK)
276 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
277 if (memcheck<=freespace) {
280 block->status=BS_USED;
281 void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
282 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
283 allocationinfo.lowestfreeblock=firstfree;
284 return blockptr+usedspace;
288 allocationinfo.lowestfreeblock=firstfree;
292 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
293 unsigned INTPTR requiredmem=gcrequiredmems[core];
294 unsigned INTPTR desiredmem=maxusefulmems[core];
295 block_t firstfree=NOFREEBLOCK;
296 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
297 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
299 for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
300 struct blockrecord * block=&allocationinfo.blocktable[searchblock];
301 if (block->status==BS_FREE) {
302 if(firstfree==NOFREEBLOCK)
303 firstfree=searchblock;
304 //don't take a block from another core that hasn't returned its memory yet
305 if (block->corenum!=core&&returnedmem[block->corenum])
308 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
309 if (freespace>=memcheck) {
310 //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
314 block->status=BS_USED;
315 void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
316 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
317 allocationinfo.lowestfreeblock=firstfree;
318 //taken care of one block
320 void *startaddr=blockptr+usedspace;
321 if (core==STARTUPCORE) {
323 gcmovestartaddr=startaddr;
324 } else if(BAMBOO_CHECK_SEND_MODE()) {
325 cache_msg_2_I(core,GCMOVESTART,startaddr);
327 send_msg_2_I(core,GCMOVESTART,startaddr);
333 //this is bad...ran out of memory
334 printf("Out of memory. Was trying for %u bytes\n", threshold);
338 void handleMemoryRequests_I() {
339 unsigned int lowestblock=allocationinfo.lowestfreeblock;
340 if (lowestblock==NOFREEBLOCK) {
341 lowestblock=numblockspercore*NUMCORES4GC;
344 for(int i=0;i < NUMCORES4GC; i++) {
345 if (gcrequiredmems[i]) {
346 handleOneMemoryRequest(i, lowestblock);
347 lowestblock=allocationinfo.lowestfreeblock;
352 /* should be invoked with interrupt turned off */
354 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
355 if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
356 //There are spare blocks
357 unsigned int topblock=numblockspercore*NUMCORES4GC;
360 if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
362 } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
367 // If we cannot find spare mem right now, hold the request
368 gcrequiredmems[requiredcore] = requiredmem;
369 maxusefulmems[requiredcore]=desiredmem;
372 int count=gc_countRunningCores();
373 if (gcmovepending==count) {
374 // All cores have stopped...hand out memory as necessary to handle all requests
375 handleMemoryRequests_I();
381 #ifdef GC_CACHE_ADAPT
382 unsigned int compactblockshelper(struct moveHelper * orig, struct moveHelper * to) {
383 unsigned int minimumbytes=0;
384 void *origptr=orig->ptr;
385 void *origbound=orig->bound;
386 void * tmporig=orig->ptr;
387 void * tmpto=to->ptr;
390 //call compactblocks using the page boundaries at the current bounds
391 minimumbytes=compactblocks(orig, to);
392 if(minimumbytes == 0) {
393 //bump the orig page bound...
394 //use old orig pointer to make sure we get correct block
395 CACHEADAPT_FINISH_SRC_PAGE(tmporig, tmpto, to->ptr);
396 if (orig->ptr<origbound) {
399 orig->pagebound=orig->pagebound+BAMBOO_PAGE_SIZE;
404 // require more memory
405 void *endtoptr=to->ptr+minimumbytes;
406 if (endtoptr>to->bound) {
407 CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, 0);
410 CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, minimumbytes);
411 to->pagebound=((((unsigned INTPTR)endtoptr)-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
412 //update pointers to avoid double counting the stuff we already added in
413 tmporig=orig->ptr+minimumbytes;
414 tmpto=to->ptr+minimumbytes;
421 /* This function is performance critical... spend more time optimizing it */
423 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
424 void *toptrinit=to->ptr;
425 void *toptr=toptrinit;
426 void *origptr=orig->ptr;
427 #ifdef GC_CACHE_ADAPT
428 void *origbound=orig->pagebound;
429 void *tobound=to->pagebound;
430 //set to the first line so we don't need conditions
431 void *lastflush=(void *)(((unsigned INTPTR)&gcmappingtbl[OBJMAPPINGINDEX(origptr)])&~(BAMBOO_CACHE_LINE_MASK));
433 void *origbound=orig->bound;
434 void *tobound=to->bound;
436 unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
437 unsigned int objlength;
439 while(origptr<origbound) {
440 //Try to skip over stuff fast first
441 unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
442 unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
443 if (!gcmarktbl[arrayoffset]) {
446 if (arrayoffset>=origendoffset) {
447 //finished with block(a page in CACHE_ADAPT version)...
450 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
451 #ifdef GC_CACHE_ADAPT
452 BAMBOO_CACHE_FLUSH_LINE(lastflush);
456 } while(!gcmarktbl[arrayoffset]);
457 origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
460 //Scan more carefully next
461 objlength=getMarkedLength(origptr);
463 if (objlength!=NOTMARKED) {
464 unsigned int length=ALIGNSIZETOBYTES(objlength);
466 //code between this and next comment should be removed
470 gettype_size(origptr, &type, &size);
471 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
474 tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
475 unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
476 unsigned INTPTR hibits=alignsize>>4;
477 unsigned INTPTR lobits=(alignsize&15)<<1;
478 tprintf("hibits=%x lobits=%x\n", hibits, lobits);
479 tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
482 //end of code to remove
484 void *endtoptr=toptr+length;
485 if (endtoptr>tobound) {
486 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
489 #ifdef GC_CACHE_ADAPT
490 BAMBOO_CACHE_FLUSH_LINE(lastflush);
494 //good to move objects and update pointers
496 void ** mapptr=&gcmappingtbl[OBJMAPPINGINDEX(origptr)];
499 #ifdef GC_CACHE_ADAPT
500 void *maskmapptr=(void *)(((unsigned INTPTR)mapptr)&~(BAMBOO_CACHE_LINE_MASK));
502 if (lastflush!=maskmapptr) {
503 BAMBOO_CACHE_FLUSH_LINE(lastflush);
504 lastflush=maskmapptr;
511 origptr+=ALIGNMENTSIZE;
515 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
516 #ifdef GC_CACHE_ADAPT
517 BAMBOO_CACHE_FLUSH_LINE(lastflush);
523 BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
525 // initialize structs for compacting
526 struct moveHelper orig;
527 struct moveHelper to;
528 initOrig_Dst(&orig, &to);
530 compacthelper(&orig, &to);
531 #ifdef GC_CACHE_ADAPT
536 void master_compact() {
537 // predict number of blocks to fill for each core
538 numblockspercore = loadbalance()+1;
539 numblockspercore = (numblockspercore>GCNUMLOCALBLOCK)?GCNUMLOCALBLOCK:numblockspercore;
542 GC_PRINTF("mark phase finished \n");
544 gc_resetCoreStatus();
545 //initialize local data structures first....we don't want remote requests messing data up
546 unsigned int initblocks=numblockspercore*NUMCORES4GC;
547 allocationinfo.lowestfreeblock=NOFREEBLOCK;
550 for(int i=0;i<initblocks;i++) {
551 allocationinfo.blocktable[i].status=BS_USED;
555 for(int i=initblocks;i<GCNUMBLOCK;i++) {
556 allocationinfo.blocktable[i].status=BS_FREE;
557 allocationinfo.blocktable[i].usedspace=0;
558 //this is true because all cores have at least one block already...
559 allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
562 //start all of the cores
563 for(int i = 0; i < NUMCORES4GC; i++) {
564 // init some data strutures for compact phase
565 gcrequiredmems[i] = 0;
568 //send start compact messages to all cores
569 if(i != STARTUPCORE) {
570 send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
572 gcblock2fill = numblockspercore;
575 GCPROFILE_ITEM_MASTER();
578 /* wait for all cores to finish compacting */
579 GC_PRINTF("master finished\n");
581 while(!gc_checkCoreStatus())
584 // GC_CACHE_COHERENT_ON should be true for gcmappingtbl, and the gcmappingtbl should be zeroed out before starting gc
586 void *nextvalid=gcbaseva;
587 for(void *tmp=gcbaseva; tmp<gcbaseva+BAMBOO_SHARED_MEM_SIZE;tmp+=ALIGNMENTSIZE) {
588 unsigned int objlength=getMarkedLength(tmp);
589 void *forwarding=gcmappingtbl[OBJMAPPINGINDEX(tmp)];
590 if (tmp>=nextvalid&&((objlength!=0)!=(forwarding!=NULL))) {
591 tprintf("Maps disagree tmp=%x olength=%u forwarding=%x\n",tmp, objlength, forwarding);
593 if (tmp<nextvalid&&forwarding!=NULL) {
594 tprintf("Weird forwarding pointer\n");
596 if (tmp>=nextvalid&&(objlength!=0||forwarding!=NULL)) {
597 unsigned int length=ALIGNSIZETOBYTES(objlength);
600 nextvalid=tmp+length;
601 gettype_size(tmp, &type, &size);
602 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
604 tprintf("Bad size in bitmap: tmp=%x length=%u size=%u type=%u\n", tmp, length, size, type);
607 BLOCKINDEX(blockindex, forwarding);
608 struct blockrecord * block=&allocationinfo.blocktable[blockindex];
609 void *blockptr=OFFSET2BASEVA(blockindex)+gcbaseva;
611 if (block->status==BS_FREE) {
612 if (forwarding>(blockptr+block->usedspace)) {
613 tprintf("Pointer references free space forwarding=%x tmp=%x length=%u type=%u blockindex=%u, baseptr=%x, usedspace=%u, status=%u\n", forwarding, tmp, length, type,blockindex, blockptr, block->usedspace, block->status);
620 GCPROFILE_ITEM_MASTER();
622 //just in case we didn't get blocks back...
623 if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
624 allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
626 // compute live object space
627 GCPROFILE_RECORD_SPACE_MASTER();
628 GC_PRINTF("compact phase finished \n");
631 #endif // MULTICORE_GC