remove debug printouts
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
index 62021194a8a897b7cf192f89c23cc8f19e7f70c9..c581122962ff684db78d2dfc971153d3ba2ef224 100644 (file)
@@ -4,20 +4,16 @@
 #include "multicoreruntime.h"
 #include "multicoregarbage.h"
 #include "markbit.h"
+#include "multicoremem_helper.h"
 
-bool gc_checkCoreStatus() {
-  for(int i = 0; i < NUMCORES4GC; ++i) {
-    if(gccorestatus[i] != 0) {
-      return false;
+int gc_countRunningCores() {
+  int count=0;
+  for(int i = 0; i < NUMCORES4GC; i++) {
+    if(returnedmem[i]) {
+      count++;
     }
-  }  
-  return true;
-}
-
-void gc_resetCoreStatus() {
-  for(int i = 0; i < NUMCORES4GC; ++i) {
-    gccorestatus[i] = 1;
   }
+  return count;
 }
 
 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
@@ -41,14 +37,117 @@ void getSpaceLocally(struct moveHelper *to) {
   to->bound = to->base + BLOCKSIZE(to->localblocknum);
 }
 
+//This function is called on the master core only...and typically by
+//the message interrupt handler
+
+void handleReturnMem_I(unsigned int cnum, void *heaptop) {
+  unsigned int blockindex;
+  BLOCKINDEX(blockindex, heaptop);
+  unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
+  //this core is done as far as memory usage is concerned
+  returnedmem[cnum]=0;
+
+  struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
+
+  blockrecord->status=BS_FREE;
+  blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
+  blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
+  /* Update the lowest free block */
+  if (blockindex < allocationinfo.lowestfreeblock) {
+    allocationinfo.lowestfreeblock=blockindex;
+  }
+
+  /* This is our own block...means we should mark other blocks above us as free*/
+  if (cnum==blockrecord->corenum) {
+    unsigned INTPTR nextlocalblocknum=localblocknum+1;
+    for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
+      unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
+      struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
+      nextblockrecord->status=BS_FREE;
+      nextblockrecord->usedspace=0;
+      //this is true because this cannot be the lowest block
+      nextblockrecord->freespace=BLOCKSIZE(1);
+    }
+  }
+
+  //this could be the last one....
+  int count=gc_countRunningCores();
+  if (gcmovepending==count) {
+    // All cores have stopped...hand out memory as necessary to handle all requests
+    handleMemoryRequests_I();
+  } else {
+    //see if returned memory blocks let us resolve requests
+    useReturnedMem(cnum, allocationinfo.lowestfreeblock);
+  }
+}
+
+void useReturnedMem(unsigned int corenum, block_t localblockindex) {
+  for(int i=0;i<NUMCORES4GC;i++) {
+    unsigned INTPTR requiredmem=gcrequiredmems[i];
+    if (requiredmem) {
+      unsigned INTPTR desiredmem=maxusefulmems[i];
+      unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+      unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
+
+
+      for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
+       unsigned INTPTR blocknum=BLOCKINDEX2(corenum, nextlocalblocknum);
+       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
+       if (nextblockrecord->status==BS_FREE) {
+         unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
+         if (freespace>=memcheck) {
+           nextblockrecord->status=BS_USED;
+           void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
+           unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+           //taken care of one block
+           gcmovepending--;
+           void *startaddr=blockptr+usedspace;
+           gcrequiredmems[i]=0;
+           maxusefulmems[i]=0;
+           if (i==STARTUPCORE) {
+             gctomove = true;
+             gcmovestartaddr = startaddr;
+           } else if(BAMBOO_CHECK_SEND_MODE()) {
+             cache_msg_2_I(i,GCMOVESTART,startaddr);
+           } else {
+             send_msg_2_I(i,GCMOVESTART,startaddr);
+           }
+         }
+       }
+      }
+    }
+  }
+}
+
+void handleReturnMem(unsigned int cnum, void *heaptop) {
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+  handleReturnMem_I(cnum, heaptop);
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+}
+
 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
   //need to get another block from elsewhere
   //set flag to wait for memory
-  gctomove=false;
-  //send request for memory
-  send_msg_3(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes);
-  //wait for flag to be set that we received message
-  while(!gctomove) ;
+
+  if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
+    gctomove=false;
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+
+    if (startaddr) {
+      gcmovestartaddr=startaddr;
+    } else {
+      while(!gctomove) ;
+    }
+  } else {
+    gctomove=false;
+    //send request for memory
+    send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
+    //wait for flag to be set that we received message
+    while(!gctomove)
+      ;
+  }
 
   //store pointer
   to->ptr = gcmovestartaddr;
@@ -73,13 +172,16 @@ void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
   bool senttopmessage=false;
   while(true) {
-    if ((gcheaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
+    if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
       //This block is the last for this core...let the startup know
-      send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gcheaptop);
+      if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
+       handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
+      } else {
+       send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
+      }
       //Only send the message once
       senttopmessage=true;
     }
-
     unsigned int minimumbytes=compactblocks(orig, to);
     if (orig->ptr==orig->bound) {
       //need more data to compact
@@ -95,54 +197,161 @@ void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
       getSpace(to, minimumbytes);
     }
   }
-  
-  send_msg_3(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0);
+  if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  } else {
+    send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
+  }
 }
 
-/* Should be invoked with interrupt turned off. */
+void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
+  int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
+  unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+  unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
+
+  for(int i=0;i<NUM_CORES2TEST;i++) {
+    int neighborcore=core2test[corenum][i];
+    if (neighborcore!=-1) {
+      for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
+       block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
+       struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
+       if (block->status==BS_FREE) {
+         unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
+         if (memcheck<=freespace) {
+           //we have a block
+           //mark block as used
+           block->status=BS_USED;
+           void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
+           unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+           return blockptr+usedspace;
+         }
+       }
+      }
+    }
+  }
+  return NULL;
+}
 
-void * assignSpareMem_I(unsigned int sourcecore, unsigned int requiredmem) {
+void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
+  unsigned int firstfree=NOFREEBLOCK;
+  unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+  unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
+
+  for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
+    struct blockrecord * block=&allocationinfo.blocktable[i];
+    if (block->status==BS_FREE) {
+      if(firstfree==NOFREEBLOCK)
+       firstfree=i;
+      unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
+      if (memcheck<=freespace) {
+       //we have a block
+       //mark block as used
+       block->status=BS_USED;
+       void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
+       unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+       allocationinfo.lowestfreeblock=firstfree;
+       return blockptr+usedspace;
+      }
+    }
+  }
+  allocationinfo.lowestfreeblock=firstfree;
   return NULL;
 }
 
-void * assignSpareMem(unsigned int sourcecore,unsigned int requiredmem) {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  void * retval=assignSpareMem_I(sourcecore, requiredmem);
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  return retval;
+void handleOneMemoryRequest(int core, unsigned int lowestblock) {
+  unsigned INTPTR requiredmem=gcrequiredmems[core];
+  unsigned INTPTR desiredmem=maxusefulmems[core];
+  block_t firstfree=NOFREEBLOCK;
+  unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+  unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
+
+  for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
+    struct blockrecord * block=&allocationinfo.blocktable[searchblock];
+    if (block->status==BS_FREE) {
+      if(firstfree==NOFREEBLOCK)
+       firstfree=searchblock;
+      unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
+      if (freespace>=memcheck) {
+       //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
+
+       //we have a block
+       //mark block as used
+       block->status=BS_USED;
+       void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
+       unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+       allocationinfo.lowestfreeblock=firstfree;
+       //taken care of one block
+       gcmovepending--;
+       void *startaddr=blockptr+usedspace;
+       if(BAMBOO_CHECK_SEND_MODE()) {
+         cache_msg_2_I(core,GCMOVESTART,startaddr);
+       } else {
+         send_msg_2_I(core,GCMOVESTART,startaddr);
+       }
+       return;
+      }
+    }
+  }
+  //this is bad...ran out of memory
+  printf("Out of memory.  Was trying for %u bytes\n", threshold);
+  BAMBOO_EXIT();
+}
+
+void handleMemoryRequests_I() {
+  unsigned int lowestblock=allocationinfo.lowestfreeblock;
+  if (lowestblock==NOFREEBLOCK) {
+    lowestblock=numblockspercore*NUMCORES4GC;
+  }
+  
+  for(int i=0;i < NUMCORES4GC; i++) {
+    if (gcrequiredmems[i]) {
+      handleOneMemoryRequest(i, lowestblock);
+      lowestblock=allocationinfo.lowestfreeblock;
+    }
+  }
 }
 
 /* should be invoked with interrupt turned off */
 
-void * gcfindSpareMem_I(unsigned int requiredmem,unsigned int requiredcore) {
-  void * startaddr;
-  for(int k = 0; k < NUMCORES4GC; k++) {
+void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
+  if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
+    //There are spare blocks
+    unsigned int topblock=numblockspercore*NUMCORES4GC;
+    void *memblock;
     
+    if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
+      return memblock;
+    } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
+      return memblock;
+    }
   }
+  
   // If we cannot find spare mem right now, hold the request
   gcrequiredmems[requiredcore] = requiredmem;
+  maxusefulmems[requiredcore]=desiredmem;
   gcmovepending++;
+
+  int count=gc_countRunningCores();
+  if (gcmovepending==count) {
+    // All cores have stopped...hand out memory as necessary to handle all requests
+    handleMemoryRequests_I();
+  }
+
   return NULL;
 } 
 
-bool gcfindSpareMem(unsigned int requiredmem,unsigned int requiredcore) {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  bool retval=gcfindSpareMem_I(requiredmem, requiredcore);
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  return retval;
-}
-
 /* This function is performance critical...  spend more time optimizing it */
 
 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
   void *toptrinit=to->ptr;
-  void *toptr=toptr;
+  void *toptr=toptrinit;
   void *tobound=to->bound;
   void *origptr=orig->ptr;
   void *origbound=orig->bound;
   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
   unsigned int objlength;
-
   while(origptr<origbound) {
     //Try to skip over stuff fast first
     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
@@ -150,12 +359,12 @@ unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
     if (!gcmarktbl[arrayoffset]) {
       do {
        arrayoffset++;
-       if (arrayoffset<origendoffset) {
+       if (arrayoffset>=origendoffset) {
          //finished with block...
          origptr=origbound;
          to->ptr=toptr;
          orig->ptr=origptr;
-         gcheaptop-=(unsigned INTPTR)(toptr-toptrinit)
+         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
          return 0;
        }
       } while(!gcmarktbl[arrayoffset]);
@@ -167,25 +376,50 @@ unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
 
     if (objlength!=NOTMARKED) {
       unsigned int length=ALIGNSIZETOBYTES(objlength);
+
+      //code between this and next comment should be removed
+#ifdef GC_DEBUG
+      unsigned int size;
+      unsigned int type;
+      gettype_size(origptr, &type, &size);
+      size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
+      
+      if (size!=length) {
+       tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
+       unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
+       unsigned INTPTR hibits=alignsize>>4;
+       unsigned INTPTR lobits=(alignsize&15)<<1;
+       tprintf("hibits=%x lobits=%x\n", hibits, lobits);
+       tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
+      }
+#endif
+      //end of code to remove
+
       void *endtoptr=toptr+length;
       if (endtoptr>tobound) {
-       gcheaptop-=(unsigned INTPTR)(toptr-toptrinit)   
+       gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
        to->ptr=tobound;
        orig->ptr=origptr;
        return length;
       }
       //good to move objects and update pointers
+      //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
+
       gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
+
       origptr+=length;
       toptr=endtoptr;
     } else
       origptr+=ALIGNMENTSIZE;
   }
+  to->ptr=toptr;
+  orig->ptr=origptr;
+  gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
+  return 0;
 }
 
 void compact() {
   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
-  BAMBOO_CACHE_MF();
   
   // initialize structs for compacting
   struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
@@ -199,25 +433,26 @@ void compact() {
 
 void master_compact() {
   // predict number of blocks to fill for each core
-  void * tmpheaptop = 0;
-  numblockspercore = loadbalance(&tmpheaptop);
+  numblockspercore = loadbalance()+1;
   
   GC_PRINTF("mark phase finished \n");
   
   gc_resetCoreStatus();
   //initialize local data structures first....we don't want remote requests messing data up
   unsigned int initblocks=numblockspercore*NUMCORES4GC;
-  allocationinfo.lowestfreeblock=NOFREEBLOCKS;
+  allocationinfo.lowestfreeblock=NOFREEBLOCK;
 
   //assigned blocks
   for(int i=0;i<initblocks;i++) {
-    allocationinfo.blocktable[i].status=BS_INIT;
+    allocationinfo.blocktable[i].status=BS_USED;
   }
 
   //free blocks
   for(int i=initblocks;i<GCNUMBLOCK;i++) {
     allocationinfo.blocktable[i].status=BS_FREE;
     allocationinfo.blocktable[i].usedspace=0;
+    //this is true because all cores have at least one block already...
+    allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
   }
 
   //start all of the cores
@@ -225,6 +460,7 @@ void master_compact() {
     // init some data strutures for compact phase
     gcrequiredmems[i] = 0;
     gccorestatus[i] = 1;
+    returnedmem[i] = 1;
     //send start compact messages to all cores
     if(i != STARTUPCORE) {
       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
@@ -232,17 +468,21 @@ void master_compact() {
       gcblock2fill = numblockspercore;
     }
   }
-  BAMBOO_CACHE_MF();
   GCPROFILE_ITEM();
   // compact phase
   compact();
   /* wait for all cores to finish compacting */
+  
 
-  while(gc_checkCoreStatus())
+  while(!gc_checkCoreStatus())
     ;
 
   GCPROFILE_ITEM();
 
+  //just in case we didn't get blocks back...
+  if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
+    allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
+
   GC_PRINTF("compact phase finished \n");
 }