Restructure the shared memory allocation and fixed multiple bugs in the multicore...
authorjzhou <jzhou>
Tue, 27 Oct 2009 20:24:04 +0000 (20:24 +0000)
committerjzhou <jzhou>
Tue, 27 Oct 2009 20:24:04 +0000 (20:24 +0000)
Robust/src/Runtime/multicoregarbage.c
Robust/src/Runtime/multicoregarbage.h
Robust/src/Runtime/multicoreruntime.h
Robust/src/Runtime/multicoretask.c
Robust/src/Runtime/runtime.h
Robust/src/buildscript

index 58491b6225a94642acae8b5204e99c4abc2f395d..78b3cf1dfaeaa7f5cbbb9510600cf91dd98b7bdd 100644 (file)
@@ -62,10 +62,14 @@ inline void dumpSMem() {
        tprintf("++++ reserved sblocks ++++ \n");
        for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
                tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
-            *((int *)(i)), *((int *)(i + 4)), *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
-                                               *((int *)(i + 4*4)), *((int *)(i + 4*5)), *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
-                                               *((int *)(i + 4*8)), *((int *)(i + 4*9)), *((int *)(i + 4*10)), *((int *)(i + 4*11)),
-                                               *((int *)(i + 4*12)), *((int *)(i + 4*13)), *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+            *((int *)(i)), *((int *)(i + 4)), 
+                                               *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
+                                               *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
+                                               *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
+                                               *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
+                                               *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+                                               *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
+                                               *((int *)(i + 4*14)), *((int *)(i + 4*15)));
        }
        sblock = gcreservedsb;
        bool advanceblock = false;
@@ -97,15 +101,20 @@ inline void dumpSMem() {
                        }
                        x = tmpcore/bamboo_width;
                        y = tmpcore%bamboo_width;
-                       tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", block, sblock++, 
-                                           x, y, (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
+                       tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", 
+                                           block, sblock++, x, y, 
+                                                       (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
                }
                j++;
     tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
-            *((int *)(i)), *((int *)(i + 4)), *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
-                                               *((int *)(i + 4*4)), *((int *)(i + 4*5)), *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
-                                               *((int *)(i + 4*8)), *((int *)(i + 4*9)), *((int *)(i + 4*10)), *((int *)(i + 4*11)),
-                                               *((int *)(i + 4*12)), *((int *)(i + 4*13)), *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+            *((int *)(i)), *((int *)(i + 4)), 
+                                               *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
+                                               *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
+                                               *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
+                                               *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
+                                               *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+                                               *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
+                                               *((int *)(i + 4*14)), *((int *)(i + 4*15)));
        }
        tprintf("\n");
 }
@@ -540,6 +549,8 @@ inline void initGC() {
 
        freeRuntimeHash(gcpointertbl);
        gcpointertbl = allocateRuntimeHash(20);
+
+       memset(gcsmemtbl, '\0', sizeof(int)*gcnumblock);
 } // void initGC()
 
 // compute load balance for all cores
@@ -611,7 +622,8 @@ inline bool cacheLObjs() {
        while(gc_lobjmoreItems2()) {
                gc_lobjdequeue2();
                size = gclobjtail2->lengths[gclobjtailindex2 - 1];
-               // set the mark field to 2, indicating that this obj has been moved and need to be flushed
+               // set the mark field to 2, indicating that this obj has been moved and 
+               // need to be flushed
                ((int *)(gclobjtail2->lobjs[gclobjtailindex2-1]))[6] = 2;
                memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2 - 1], size);
                dst += size;
@@ -624,92 +636,69 @@ inline bool cacheLObjs() {
        return true;
 } // void cacheLObjs()
 
-inline struct freeMemItem * updateFreeMemList(int localtop,
-                                                         int localsize,
-                                                                                                                                       struct freeMemItem * listtop,
-                                                                                                                                                                                       int * returntop) {
-       struct freeMemItem * tochange = listtop;
-       struct freeMemItem * tmp = bamboo_free_mem_list->head;
-       bool extendflag = false;
-       struct freeMemItem * ex_tmp = NULL;
-       // check if there is a hole in the block below it
-       while(true) {
-               if(tmp->ptr<localtop) {
-                       if((tmp->ptr+tmp->size) == localtop) {
-                               // extend the hole up to includ this block
-                               tmp->size += localsize;
-                               extendflag = true;
-                               *returntop = tmp->ptr;
-                               break;
-                       } // if((tmp->ptr+tmp->size) == localtop)
-               } else {
-                       break;
-               } // if(tmp->ptr<gcloads[i]) else ...
-               if(tmp == tochange) {
+// NOTE: the free mem chunks should be maintained in an ordered linklist
+// the listtop param always specify current list tail
+
+// update the gcsmemtbl to record current shared mem usage
+void updateSmemTbl(int coren,
+                              int localtop) {
+       int ltopcore = 0;
+       int bound = BAMBOO_SMEM_SIZE_L;
+       BLOCKINDEX(localtop, &ltopcore);
+       if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
+               bound = BAMBOO_SMEM_SIZE;
+       }
+       int load = (localtop-gcbaseva)%bound;
+       int i = 0;
+       int j = 0;
+       int toset = 0;
+       do{
+               toset = gc_core2block[2*coren+i]+124*j;
+               if(toset < ltopcore) {
+                       gcsmemtbl[toset] = (toset<NUMCORES)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+               } else if(toset == ltopcore) {
+                       gcsmemtbl[toset] = load;
                        break;
                } else {
-                       tmp = tmp->next;
-               } // if(tmp == tochange)
-       } // while(true)
-       if((extendflag) && (tmp != tochange)) {
-               ex_tmp = tmp;
-               tmp = tmp->next;
-       } // if(tmp->ptr<gcloads[i])
-       if(tmp != tochange) {
-               while(true) {
-                       if((localtop+localsize) == tmp->ptr) {
-                               // extend the hole below to include this block
-                               extendflag = true;
-                               if(ex_tmp == NULL) {
-                                       tmp->ptr = localtop;
-                                       tmp->size += localsize;
-                                       *returntop = localtop;
-                               } else {
-                                       ex_tmp->size += tmp->size;
-                                       tmp->ptr = tmp->next->ptr;
-                                       tmp->size = tmp->next->size;
-                                       if(tmp->next == tochange) {
-                                               tochange = tmp;
-                                       }
-                                       ex_tmp = tmp->next;
-                                       tmp->next = tmp->next->next;
-                                       RUNFREE(ex_tmp);
-                                       ex_tmp = NULL;
-                               }
-                               break;
-                       }
-                       if(tmp == tochange) {
-                               break;
-                       } else {
-                               tmp = tmp->next;
-                       } // if(tmp == tochange)
-               } // while(true)
-       } // if(tmp != tochange)
-       if((!extendflag) && (tmp == tochange)) {
-               // add a new item for this block hole
+                       break;
+               }
+               i++;
+               if(i == 2) {
+                       i = 0;
+                       j++;
+               }
+       }while(true);
+} // void updateSmemTbl(int, int)
+
+inline struct freeMemItem * addFreeMemItem(int ptr,
+                                                      int size,
+                                                                                                                                                                        struct freeMemItem * listtail,
+                                                                                                                                                                        bool* sethead) {
+       struct freeMemItem * tochange = listtail;
+       if(*sethead) {
                if(tochange->next == NULL) {
                        tochange->next = 
                                (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
-               }
+               } // if(tochange->next == NULL)
                tochange = tochange->next;
-               tochange->ptr = localtop;
-               tochange->size = localsize;
-               *returntop = localtop;
-       } // if((!extendflag) && (tmp == tochange))
+       } else {
+               *sethead = true;
+       } // if(sethead)
+       tochange->ptr = ptr;
+       tochange->size = size;
+       BLOCKINDEX(ptr, &(tochange->startblock));
+       BLOCKINDEX(ptr+size-1, &(tochange->endblock));
+       // zero out all these spare memory
+       // note that, leave the mem starting from heaptop, as it caches large objs
+       // zero out these cache later when moving large obj
+       memset(tochange->ptr, '\0', tochange->size);
        return tochange;
-} // void updateFreeMemList(int, int, struct freeMemItem *, int *)
+} // struct freeMemItem * addFreeMemItem(int,int,struct freeMemItem*,bool*, int)
 
 inline void moveLObjs() {
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xea01);
 #endif
-       int remain = 0;
-       int bound = BAMBOO_SMEM_SIZE_L;
-       struct freeMemItem * tochange = bamboo_free_mem_list->head;
-       if(tochange == NULL) {
-               bamboo_free_mem_list->head = tochange = 
-                       (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
-       }
        // find current heap top
        // flush all gcloads to indicate the real heap top on one core
        // previous it represents the next available ptr on a core
@@ -717,22 +706,14 @@ inline void moveLObjs() {
                        && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
                // edge of a block, check if this is exactly the heaptop
                BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
-               gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
-       } else {
-               // in the middle of a block, flush the remaining space in this block
-               // and update it into the free mem list
-               if(gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L))) {
-                       bound = BAMBOO_SMEM_SIZE;
-               }
-               remain = bound - gcloads[0]%bound;
-               tochange->ptr = gcloads[0];
-               tochange->size = remain;
-               // zero out all these spare memory
-               memset(tochange->ptr, '\0', tochange->size);
-       }
-       int tmpheaptop = gcloads[0];
+               gcloads[0]+=(gcfilledblocks[0]>1?
+                               (BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
+       } 
+       updateSmemTbl(0, gcloads[0]);
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+  BAMBOO_DEBUGPRINT(0xea02);
+       BAMBOO_DEBUGPRINT_REG(gcloads[0]);
+       BAMBOO_DEBUGPRINT_REG(gcsmemtbl[0]);
 #endif
        for(int i = 1; i < NUMCORES; i++) {
                int tmptop = 0;
@@ -748,46 +729,37 @@ inline void moveLObjs() {
                        gcloads[i]
                                +=(gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
                        tmptop = gcloads[i];
-               } else {
-                       // in the middle of a block, flush the remaining space in this block
-                       // and update it into the free mem list
-                       if(gcfilledblocks[i] > 0) {
-                               bound = BAMBOO_SMEM_SIZE;
-                       } else {
-                               bound = BAMBOO_SMEM_SIZE_L;
-                       }
-                       remain = bound - gcloads[i]%bound;
-                       // zero out all these spare memory
-                       memset(gcloads[i], '\0', remain);
-                       // update free mem list
-                       tochange = updateFreeMemList(gcloads[i], remain, tochange, &tmptop);
-               } // if((gcfilledblocks[i] > 0)
-
-               if(tmpheaptop < tmptop) {
-                       tmpheaptop = tmptop;
-               }
+               } 
+               updateSmemTbl(i, gcloads[i]);
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT_REG(gcloads[i]);
-               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
+       } // for(int i = 1; i < NUMCORES; i++) {
+
+       // find current heap top
+       // TODO
+       // a bug here: when using local allocation, directly move large objects
+       // to the highest free chunk might not be memory efficient
+       int tmpheaptop = 0;
+       int size = 0;
+       int bound = 0;
+       int i = 0;
+       for(i = gcnumblock-1; i >= 0; i--) {
+               if(gcsmemtbl[i] > 0) {
+                       break;
+               }
        }
-       tochange->ptr = tmpheaptop;
-       tochange->size = gcheaptop - tmpheaptop;
-       // zero out all these spare memory
-       memset(tochange->ptr, '\0', tochange->size);
-       if(bamboo_free_mem_list->tail != tochange) {
-               bamboo_free_mem_list->tail = tochange;
-       }
-       while(tochange->next != NULL) {
-               struct freeMemItem * toremove = tochange->next;
-               tochange->next = toremove->next;
-               RUNFREE(toremove);
+       if(i == -1) {
+               tmpheaptop = gcbaseva;
+       } else {
+               tmpheaptop = gcbaseva+gcsmemtbl[i]+((i<NUMCORES)?(BAMBOO_SMEM_SIZE_L*i):
+                               (BAMBOO_SMEM_SIZE*(i-NUMCORES)+BAMBOO_LARGE_SMEM_BOUND));
        }
        // move large objs from gcheaptop to tmpheaptop
        // write the header first
        int tomove = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - gcheaptop;
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea02);
+       BAMBOO_DEBUGPRINT(0xea03);
        BAMBOO_DEBUGPRINT_REG(tomove);
        BAMBOO_DEBUGPRINT_REG(tmpheaptop);
        BAMBOO_DEBUGPRINT_REG(gcheaptop);
@@ -797,180 +769,238 @@ inline void moveLObjs() {
                           BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE*sizeof(INTPTR));
        if(tomove == 0) {
                gcheaptop = tmpheaptop;
-               return;
-       }
-       // check how many blocks it acrosses
-       remain = tmpheaptop-gcbaseva;
-       int b = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;
-       // check the remaining space in this block
-       bound = (BAMBOO_SMEM_SIZE);
-       if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
-               bound = (BAMBOO_SMEM_SIZE_L);
-       }
-       remain = bound - remain%bound;
-
+       } else {
+               // check how many blocks it acrosses
+               int remain = tmpheaptop-gcbaseva;
+               int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb; // number of the sblock
+               int b = 0; // number of the block
+               BLOCKINDEX(tmpheaptop, &b);
+               // check the remaining space in this block
+               bound = (BAMBOO_SMEM_SIZE);
+               if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
+                       bound = (BAMBOO_SMEM_SIZE_L);
+               }
+               remain = bound - remain%bound;
+
+#ifdef DEBUG
+               BAMBOO_DEBUGPRINT(0xea04);
+#endif
+               size = 0;
+               int isize = 0;
+               int host = 0;
+               int ptr = 0;
+               int base = tmpheaptop;
+               int cpysize = 0;
+               remain -= BAMBOO_CACHE_LINE_SIZE;
+               tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+               while(gc_lobjmoreItems()) {
+                       ptr = (int)(gc_lobjdequeue(&size, &host));
+                       ALIGNSIZE(size, &isize);
+                       if(remain < isize) {
+                               // this object acrosses blocks
+                               if(cpysize > 0) {
+                                       // close current block, fill its header
+                                       memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
+                                       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
+                                       gcsmemtbl[b] = cpysize + BAMBOO_CACHE_LINE_SIZE;
+                                       cpysize = 0;
+                                       base = tmpheaptop;
+                                       if(remain == 0) {
+                                               remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
+                                                                                BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+                                       } 
+                                       remain -= BAMBOO_CACHE_LINE_SIZE;
+                                       tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+                                       BLOCKINDEX(tmpheaptop, &b);
+                                       sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
+                               } // if(cpysize > 0)
+
+                               // move the large obj
+                               memcpy(tmpheaptop, gcheaptop, size);
+                               // fill the remaining space with -2 padding
+                               memset(tmpheaptop+size, -2, isize-size);
+                               // zero out original mem caching the lobj
+                               memset(gcheaptop, '\0', size);
+#ifdef DEBUG
+                               BAMBOO_DEBUGPRINT(0xea05);
+                               BAMBOO_DEBUGPRINT_REG(gcheaptop);
+                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+                               BAMBOO_DEBUGPRINT_REG(size);
+                               BAMBOO_DEBUGPRINT_REG(isize);
+                               BAMBOO_DEBUGPRINT_REG(base);
+#endif
+                               gcheaptop += size;
+                               if(host == BAMBOO_NUM_OF_CORE) {
+                                       BAMBOO_START_CRITICAL_SECTION();
+                                       RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
+                                       BAMBOO_CLOSE_CRITICAL_SECTION();
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea03);
+                                       BAMBOO_DEBUGPRINT(0xcdca);
+                                       BAMBOO_DEBUGPRINT_REG(ptr);
+                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-       int size = 0;
-       int isize = 0;
-       int host = 0;
-       int ptr = 0;
-       int base = tmpheaptop;
-       int cpysize = 0;
-       remain -= BAMBOO_CACHE_LINE_SIZE;
-       tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-       while(gc_lobjmoreItems()) {
-               ptr = (int)(gc_lobjdequeue(&size, &host));
-               ALIGNSIZE(size, &isize);
-               if(remain < isize) {
-                       // this object acrosses blocks
-                       if(cpysize > 0) {
-                               // close current block, fill its header
+                               } else {
+                                       // send the original host core with the mapping info
+                                       send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
+#ifdef DEBUG
+                                       BAMBOO_DEBUGPRINT(0xcdcb);
+                                       BAMBOO_DEBUGPRINT_REG(ptr);
+                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+#endif
+                               } // if(host == BAMBOO_NUM_OF_CORE) else ...
+                               tmpheaptop += isize;
+
+                               // set the gcsbstarttbl and gcsmemtbl
+                               int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
+                               for(int k = 1; k < tmpsbs; k++) {
+                                       gcsbstarttbl[sb+k] = (INTPTR)(-1);
+                               }
+                               sb += tmpsbs;
+                               bound = (b<NUMCORES)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+                               BLOCKINDEX(tmpheaptop-1, &tmpsbs);
+                               for(; b < tmpsbs; b++) {
+                                       gcsmemtbl[b] = bound;
+                                       if(b==NUMCORES-1) {
+                                               bound = BAMBOO_SMEM_SIZE;
+                                       }
+                               }
+                               if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
+                                       gcsbstarttbl[sb] = (INTPTR)(-1);
+                                       remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
+                                                                        BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+                                       gcsmemtbl[b] = bound;
+                               } else {
+                                       gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
+                                       remain = tmpheaptop-gcbaseva;
+                                       gcsmemtbl[b] = remain%bound;
+                                       remain = bound - gcsmemtbl[b];
+                               } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
+
+                               // close current block and fill the header
                                memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                               *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
+                               *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
                                cpysize = 0;
                                base = tmpheaptop;
-                               if(remain == 0) {
-                                       remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
-                                                      BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-                               } 
                                remain -= BAMBOO_CACHE_LINE_SIZE;
                                tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-                       } // if(cpysize > 0)
-
-                       // move the large obj
-                       memcpy(tmpheaptop, gcheaptop, size);
-                       // fill the remaining space with -2 padding
-                       memset(tmpheaptop+size, -2, isize-size);
-                       // zero out original mem caching the lobj
-                       memset(gcheaptop, '\0', size);
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xea04);
-                       BAMBOO_DEBUGPRINT_REG(gcheaptop);
-                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-                       BAMBOO_DEBUGPRINT_REG(size);
-                       BAMBOO_DEBUGPRINT_REG(isize);
-#endif
-                       gcheaptop += size;
-                       if(host == BAMBOO_NUM_OF_CORE) {
-                               BAMBOO_START_CRITICAL_SECTION();
-                               RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                               BAMBOO_CLOSE_CRITICAL_SECTION();
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xcdca);
-                               BAMBOO_DEBUGPRINT_REG(ptr);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-#endif
                        } else {
-                               // send the original host core with the mapping info
-                               send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xcdcb);
-                               BAMBOO_DEBUGPRINT_REG(ptr);
+                               remain -= isize;
+                               // move the large obj
+                               memcpy(tmpheaptop, gcheaptop, size);
+                               // fill the remaining space with -2 padding
+                               memset(tmpheaptop+size, -2, isize-size);
+                               // zero out original mem caching the lobj
+                               memset(gcheaptop, '\0', size);
+#ifdef DEBUG
+                               BAMBOO_DEBUGPRINT(0xea06);
+                               BAMBOO_DEBUGPRINT_REG(gcheaptop);
                                BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+                               BAMBOO_DEBUGPRINT_REG(size);
+                               BAMBOO_DEBUGPRINT_REG(isize);
 #endif
-                       } // if(host == BAMBOO_NUM_OF_CORE) else ...
-                       tmpheaptop += isize;
-
-                       // set the gcsbstarttbl
-                       int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
-                       for(int k = 1; k < tmpsbs; k++) {
-                               gcsbstarttbl[b+k] = (INTPTR)(-1);
-                       }
-                       b += tmpsbs;
-                       if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
-                               gcsbstarttbl[b] = (INTPTR)(-1);
-                               remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
-                                                    BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-                       } else {
-                               gcsbstarttbl[b] = (INTPTR)(tmpheaptop);
-                               remain = tmpheaptop-gcbaseva;
-                               int bound = remain<(BAMBOO_LARGE_SMEM_BOUND)?(BAMBOO_SMEM_SIZE_L):(BAMBOO_SMEM_SIZE);
-                               remain = bound - remain%bound;
-                       } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
-
-                       // close current block and fill the header
-                       memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                       *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
-                       cpysize = 0;
-                       base = tmpheaptop;
-                       remain -= BAMBOO_CACHE_LINE_SIZE;
-                       tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-               } else {
-                       remain -= isize;
-                       // move the large obj
-                       memcpy(tmpheaptop, gcheaptop, size);
-                       // fill the remaining space with -2 padding
-                       memset(tmpheaptop+size, -2, isize-size);
-                       // zero out original mem caching the lobj
-                       memset(gcheaptop, '\0', size);
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xea05);
-                       BAMBOO_DEBUGPRINT_REG(gcheaptop);
-                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-                       BAMBOO_DEBUGPRINT_REG(size);
-                       BAMBOO_DEBUGPRINT_REG(isize);
-#endif
-                       gcheaptop += size;
-                       cpysize += isize;
-                       if(host == BAMBOO_NUM_OF_CORE) {
-                               BAMBOO_START_CRITICAL_SECTION();
-                               RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                               BAMBOO_CLOSE_CRITICAL_SECTION();
+                               gcheaptop += size;
+                               cpysize += isize;
+                               if(host == BAMBOO_NUM_OF_CORE) {
+                                       BAMBOO_START_CRITICAL_SECTION();
+                                       RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
+                                       BAMBOO_CLOSE_CRITICAL_SECTION();
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xcdcc);
-                               BAMBOO_DEBUGPRINT_REG(ptr);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+                                       BAMBOO_DEBUGPRINT(0xcdcc);
+                                       BAMBOO_DEBUGPRINT_REG(ptr);
+                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-                       } else {
-                               // send the original host core with the mapping info
-                               send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
+                               } else {
+                                       // send the original host core with the mapping info
+                                       send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xcdcd);
-                               BAMBOO_DEBUGPRINT_REG(ptr);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+                                       BAMBOO_DEBUGPRINT(0xcdcd);
+                                       BAMBOO_DEBUGPRINT_REG(ptr);
+                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-                       } // if(host == BAMBOO_NUM_OF_CORE) else ...
-                       tmpheaptop += isize;
-               } // if(remain < isize) else ...
-       } // while(gc_lobjmoreItems())
-       if(cpysize > 0) {
-               // close current block, fill the header
-               memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-               *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
-       } else {
-               tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
-       }
-       gcheaptop = tmpheaptop;
-       // update the free mem list
-       tochange->size = (BAMBOO_BASE_VA)+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
-       tochange->ptr = gcheaptop;
+                               } // if(host == BAMBOO_NUM_OF_CORE) else ...
+                               tmpheaptop += isize;
+
+                               // update gcsmemtbl
+                               if(gcsmemtbl[b] == 0) {
+                                       // add the header's size
+                                       gcsmemtbl[b] = BAMBOO_CACHE_LINE_SIZE;
+                               }
+                               gcsmemtbl[b] += isize;
+                       } // if(remain < isize) else ...
+               } // while(gc_lobjmoreItems())
+               if(cpysize > 0) {
+                       // close current block, fill the header
+                       memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
+                       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
+                       gcsmemtbl[b] = cpysize + BAMBOO_CACHE_LINE_SIZE;
+               } else {
+                       tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
+               }
+               gcheaptop = tmpheaptop;
+       } // if(tomove == 0)
+
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea06);
+       BAMBOO_DEBUGPRINT(0xea07);
        BAMBOO_DEBUGPRINT_REG(gcheaptop);
 #endif
-} // void moveLObjs()
 
-/*inline void updateFreeMemList() {
+       // update the free mem list
+       // create new free mem list according to gcsmemtbl
+       bool sethead = false;
        struct freeMemItem * tochange = bamboo_free_mem_list->head;
        if(tochange == NULL) {
                bamboo_free_mem_list->head = tochange = 
                        (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
+               tochange->next = NULL;
        }
-       // handle the top of the heap
-       tochange->ptr = gcheaptop;
-       tochange->size = BAMBOO_SHARED_MEM_SIZE + BAMBOO_BASE_VA - gcheaptop;
-       // zero out all these spare memory
-       memset(tochange->ptr, '\0', tochange->size);
-       if(bamboo_free_mem_list->tail != tochange) {
-               bamboo_free_mem_list->tail = tochange;
-               if(bamboo_free_mem_list->tail != NULL) {
-                       RUNFREE(bamboo_free_mem_list->tail);
+       int startptr = 0;
+       size = 0;
+       bound = BAMBOO_SMEM_SIZE_L;
+       for(i = 0; i < gcnumblock; i++) {
+               if(gcsmemtbl[i] < bound) {
+                       if(gcsmemtbl[i] == 0) {
+                               // blank one
+                               if(startptr == 0) {
+                                       // a start of a new free mem chunk
+                                       startptr = gcbaseva+((i<NUMCORES)?(i*BAMBOO_SMEM_SIZE_L)
+                                                       :(BAMBOO_LARGE_SMEM_BOUND+(i-NUMCORES)*BAMBOO_SMEM_SIZE));
+                               } // if(startptr == 0) 
+                               size += bound;
+                       } else {
+                               if(startptr != 0) {
+                                       // the end of previous free mem chunk
+                                       tochange = addFreeMemItem(startptr,size,tochange,&sethead);
+                                       //startptr = 0;
+                                       //size = 0;
+                               }
+                               // start of a new free mem chunk
+                               startptr = gcbaseva+((i<NUMCORES)?(i*BAMBOO_SMEM_SIZE_L)
+                                               :(BAMBOO_LARGE_SMEM_BOUND+(i-NUMCORES)*BAMBOO_SMEM_SIZE))+gcsmemtbl[i];
+                               size = bound-gcsmemtbl[i];
+                       } // if(gcsmemtbl[i] == 0) else
+               } else {
+                       if(startptr != 0) {
+                               // the end of previous free mem chunk
+                               tochange = addFreeMemItem(startptr,size,tochange,&sethead);
+                               startptr = 0;
+                               size = 0;
+                       } // if(startptr != 0) {
+               } // if(gcsmemtbl[i] < bound) else
+               if(i == NUMCORES-1) {
+                       bound = BAMBOO_SMEM_SIZE;
                }
+       } // for(i = 0; i < gcnumblock; i++) {
+       if(startptr != 0) {
+               tochange = addFreeMemItem(startptr, size, tochange, &sethead);
+               startptr = 0;
+               size = 0;
        }
-} // void updateFreeMemList()
-*/
+
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xea08);
+       BAMBOO_DEBUGPRINT_REG(gcheaptop);
+#endif
+} // void moveLObjs()
 
 // enqueue root objs
 inline void tomark(struct garbagelist * stackptr) {
@@ -1309,8 +1339,8 @@ inline void compact2Heaptophelper(int coren,
                *numblocks = gcstopblock[gctopcore];
                *p = gcloads[gctopcore];
                BLOCKINDEX(*p, &b);
-               *remain = (b<NUMCORES)?((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
-                                                                                                 :((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
+               *remain=(b<NUMCORES)?((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
+                                                                                         :((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xd106);
                BAMBOO_DEBUGPRINT_REG(gctopcore);
@@ -1378,53 +1408,6 @@ inline void compact2Heaptop() {
 #endif
 } // void compact2Heaptop()
 
-#if 0
-inline int nextTopcore(int topcore, bool direction) {
-       int nextopcore = topcore;
-       if((NUMCORES == 62) && (nextopcore>5)) {
-               nextopcore += 2;
-       }
-       int x = nextopcore / bamboo_height;
-       int y = nextopcore % bamboo_height;
-       if((direction && (y%2 == 0)) || ((!direction) && (y%2))) {
-               // increase
-               if(x == 7) {
-                       if(direction) {
-                               y++;
-                       } else {
-                               y--;
-                       }
-               } else {
-                       x++;
-               }
-       } else {
-               // decrease
-               if((x == 0) || ((x==1) &&(y==6))) {
-                       if(direction) {
-                               y++;
-                               if(y==6) {
-                                       x = 1;
-                               }
-                       } else {
-                               y--;
-                               if(y==5) {
-                                       x = 0;
-                               }
-                       }
-               } else {
-                       x--;
-               }
-       }
-       nextopcore = x*bamboo_height+y;
-       if(NUMCORES==62) {
-               if(x>0) {
-                       nextopcore -= 2;
-               }
-       }
-       return nextopcore;
-} // int nextTopcore(int topcore, bool direction)
-#endif
-
 inline void resolvePendingMoveRequest() {
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xeb01);
@@ -1571,7 +1554,7 @@ innernextSBlock:
                orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
        } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
                orig->sblockindex += 1;
-       } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound) ...
+       } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
 
        // check if this sblock should be omitted or have special start point
        if(gcsbstarttbl[orig->sblockindex] == -1) {
@@ -1624,10 +1607,6 @@ inline bool initOrig_Dst(struct moveHelper * orig,
        BAMBOO_DEBUGPRINT(0xef01);
        BAMBOO_DEBUGPRINT_REG(to->base);
 #endif
-       /*if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-               to->base += gcreservedsb * BAMBOO_SMEM_SIZE;
-               to->top += gcreservedsb * BAMBOO_SMEM_SIZE;
-       }*/
        to->ptr = to->base + to->offset;
 
        // init the orig ptr
@@ -1635,11 +1614,7 @@ inline bool initOrig_Dst(struct moveHelper * orig,
        orig->base = to->base;
        orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
        orig->blockbase = orig->base;
-       /*if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-               orig->sblockindex = gcreservedsb;
-       } else {*/
-               orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
-       //}
+       orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xef02);
        BAMBOO_DEBUGPRINT_REG(orig->base);
@@ -1740,14 +1715,16 @@ innermoveobj:
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe203);
        BAMBOO_DEBUGPRINT_REG(orig->ptr);
+       BAMBOO_DEBUGPRINT_REG(size);
 #endif
+       ALIGNSIZE(size, &isize); // no matter is the obj marked or not
+                                // should be able to across it
        if(mark == 1) {
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xe204);
 #endif
                // marked obj, copy it to current heap top
                // check to see if remaining space is enough
-               ALIGNSIZE(size, &isize);
                if(to->top + isize > to->bound) {
                        // fill -1 indicating the end of this block
                        /*if(to->top != to->bound) {
@@ -1764,7 +1741,8 @@ innermoveobj:
                                return true;
                        } // if(stopblock == to->numblocks)
                } // if(to->top + isize > to->bound)
-               // set the mark field to 2, indicating that this obj has been moved and need to be flushed
+               // set the mark field to 2, indicating that this obj has been moved 
+               // and need to be flushed
                ((int *)(orig->ptr))[6] = 2;
                if(to->ptr != orig->ptr) {
                        memcpy(to->ptr, orig->ptr, size);
@@ -1823,8 +1801,8 @@ inline int assignSpareMem_I(int sourcecore,
                                                                                                          int * startaddr) {
        int b = 0;
        BLOCKINDEX(gcloads[sourcecore], &b);
-       int boundptr = b<NUMCORES?(b+1)*BAMBOO_SMEM_SIZE_L
-               :BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE;
+       int boundptr = (b<NUMCORES)?((b+1)*BAMBOO_SMEM_SIZE_L)
+               :(BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE);
        int remain = boundptr - gcloads[sourcecore];
        int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
        *startaddr = gcloads[sourcecore];
@@ -2141,7 +2119,7 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
 
 inline void flush(struct garbagelist * stackptr) {
        flushRuntimeObj(stackptr);
-       
+
        while(gc_moreItems()) {
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xe301);
@@ -2209,7 +2187,7 @@ inline void flush(struct garbagelist * stackptr) {
                        // restore the mark field, indicating that this obj has been flushed
                        ((int *)(ptr))[6] = 0;
                } // if(((int *)(ptr))[6] == 2)
-       } // while(moi != NULL)
+       } // while(gc_moreItems())
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe308);
 #endif
@@ -2350,7 +2328,7 @@ inline void gc(struct garbagelist * stackptr) {
                }
 
                gcphase = MARKPHASE;
-               // mark phase
+    // mark phase
                while(MARKPHASE == gcphase) {
                        mark(isfirst, stackptr);
                        if(isfirst) {
@@ -2441,13 +2419,14 @@ inline void gc(struct garbagelist * stackptr) {
                bool localcompact = true;
                while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
                        if((!finishcompact) && iscontinue) {
-#ifdef DEBUG
+#ifdef GC_DEBUG
                                BAMBOO_DEBUGPRINT(0xe001);
+                               BAMBOO_DEBUGPRINT_REG(numpbc);
                                BAMBOO_DEBUGPRINT_REG(gcblock2fill);
 #endif
                                finishcompact = compacthelper(orig, to, &filledblocks, 
                                                                          &heaptopptr, &localcompact);
-#ifdef DEBUG
+#ifdef GC_DEBUG
                                BAMBOO_DEBUGPRINT(0xe002);
                                BAMBOO_DEBUGPRINT_REG(finishcompact);
                                BAMBOO_DEBUGPRINT_REG(gctomove);
@@ -2517,7 +2496,7 @@ inline void gc(struct garbagelist * stackptr) {
                } // while(COMPACTPHASE == gcphase) 
 #ifdef GC_DEBUG
                tprintf("prepare to move large objs \n");
-               dumpSMem();
+               //dumpSMem();
 #endif
                // move largeObjs
                moveLObjs();
@@ -2548,15 +2527,6 @@ inline void gc(struct garbagelist * stackptr) {
                } // while(FLUSHPHASE == gcphase)
                gcphase = FINISHPHASE;
 
-/*
-               // need to create free memory list  
-               updateFreeMemList();
-#ifdef GC_DEBUG
-               tprintf("flush phase finished \n");
-               //dumpSMem();
-#endif
-*/
-
                gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
                for(i = 1; i < NUMCORES; ++i) {
                        // send gc finish messages to all cores
index 005baecea0d99ea4d1a1226acb963d7c5b2bb2e7..ff0e857583e4abedcabf697c3da6bb2a99b42807 100644 (file)
@@ -10,7 +10,7 @@
 
 // data structures for GC
 #ifdef GC_DEBUG
-#define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE)
+#define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE * 2)
 #else
 #define BAMBOO_SMEM_SIZE_L (32 * BAMBOO_SMEM_SIZE)
 #endif
@@ -75,16 +75,21 @@ volatile bool gcismapped;
 //          moved or garbage collected.
 INTPTR * gcsbstarttbl;
 int gcreservedsb;  // number of reserved sblock for sbstarttbl
+int gcnumblock; // number of total blocks in the shared mem
 int gcbaseva; // base va for shared memory without reserved sblocks
 
+// table recording the number of used bytes in each block
+// Note: this table resides on master core's local heap
+int * gcsmemtbl;
+
 #define ISSHAREDOBJ(p) \
        (((p)>gcbaseva)&&((p)<(gcbaseva+(BAMBOO_SHARED_MEM_SIZE))))
 
 #define ALIGNSIZE(s, as) \
        (*((int*)as)) = (((s) & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE))
 
-// mapping of pointer to block # (start from 0), here the block # is the global
-// index
+// mapping of pointer to block # (start from 0), here the block # is 
+// the global index
 #define BLOCKINDEX(p, b) \
   { \
                int t = (p) - gcbaseva; \
@@ -107,48 +112,11 @@ int gcbaseva; // base va for shared memory without reserved sblocks
        }\
 }
 
-#if 0
-// mapping of pointer to host core (x,y)
-#define RESIDECORE(p, x, y) \
-  { \
-               if(1 == (NUMCORES)) { \
-                       (*((int*)x)) = 0; \
-                       (*((int*)y)) = 0; \
-               } else { \
-                       int b; \
-                       BLOCKINDEX((p), &b); \
-                       bool reverse = (b / (NUMCORES)) % 2; \
-                       int l = b % (NUMCORES); \
-                       if(reverse) { \
-                               if(62 == (NUMCORES)) { \
-                                       if(l < 14) { \
-                                               l += 1; \
-                                       } else { \
-                                               l += 2; \
-                                       } \
-                               } \
-                               (*((int*)y)) = bamboo_height - 1 - (l / bamboo_width); \
-                       } else { \
-                               if(62 == (NUMCORES)) {\
-                                       if (l > 47) {\
-                                               l += 1; \
-                                       } \
-                               } \
-                               (*((int*)y)) = l / bamboo_width; \
-                       } \
-                       if(((!reverse)&&(*((int*)y))%2) || ((reverse)&&((*((int*)y))%2==0))){ \
-                               (*((int*)x)) = bamboo_width - 1 - (l % bamboo_width); \
-                       } else { \
-                               (*((int*)x)) = (l % bamboo_width); \
-                       } \
-               } \
-       }
-#endif
-
 // NOTE: n starts from 0
-// mapping of heaptop (how many bytes there are in the local heap) to the number of
-// the block
-// the number of the block indicates that the block is the xth block on the local heap
+// mapping of heaptop (how many bytes there are in the local heap) to 
+// the number of the block
+// the number of the block indicates that the block is the xth block on 
+// the local heap
 #define NUMBLOCKS(s, n) \
        if(s < (BAMBOO_SMEM_SIZE_L)) { \
                (*((int*)(n))) = 0; \
@@ -165,45 +133,6 @@ int gcbaseva; // base va for shared memory without reserved sblocks
 
 // mapping of (core #, index of the block) to the global block index
 #define BLOCKINDEX2(c, n) (gc_core2block[(2*(c))+((n)%2)]+(124*((n)/2))) 
-#if 0
-#define BLOCKINDEX2(c, n, b) \
-  { \
-               int x; \
-               int y; \
-               int t; \
-               int cc = c; \
-               if((62 == (NUMCORES)) && (cc > 5)) cc += 2; \
-               x = cc / bamboo_height; \
-               y = cc % bamboo_height; \
-               if((n) % 2) { \
-                       if(y % 2) { \
-                               t = x + (bamboo_width - 1 - y) * bamboo_width; \
-                       } else { \
-                               t = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
-                       } \
-                       if(62 == (NUMCORES)) {\
-                               if(y>5) { \
-                                       t--; \
-                               } else { \
-                                       t -= 2; \
-                               } \
-                       } \
-               } else { \
-                       if(y % 2) { \
-                               t = bamboo_width - 1 - x + y * bamboo_width; \
-                       } else { \
-                               t = x + y * bamboo_width; \
-                       } \
-                       if(62 == (NUMCORES)) { \
-                               if(y > 5) { \
-                                       t--; \
-                               } \
-                       } \
-               } \
-               t += (NUMCORES) * (n); \
-               (*((int*)b)) = t; \
-       }
-#endif
 
 // mapping of (core #, number of the block) to the base pointer of the block
 #define BASEPTR(c, n, p) \
@@ -212,7 +141,8 @@ int gcbaseva; // base va for shared memory without reserved sblocks
                if(b < (NUMCORES)) { \
                        (*((int*)p)) = gcbaseva + b * (BAMBOO_SMEM_SIZE_L); \
                } else { \
-                       (*((int*)p)) = gcbaseva+(BAMBOO_LARGE_SMEM_BOUND)+(b-(NUMCORES))*(BAMBOO_SMEM_SIZE); \
+                       (*((int*)p)) = gcbaseva+(BAMBOO_LARGE_SMEM_BOUND)+ \
+                                      (b-(NUMCORES))*(BAMBOO_SMEM_SIZE); \
                } \
        }
 
index 5a7f861ecf236fe92c96c23a8f3ab2d8d8f428cc..14ed7921b54f0ddd56730feb8cfeba148cb4257b 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef MULTICORE_RUNTIME
 #define MULTICORE_RUNTIME
 
+#ifndef INLINE
+#define INLINE    inline __attribute__((always_inline))
+#endif
+
 ////////////////////////////////////////////////////////////////
 // global variables                                          //
 ///////////////////////////////////////////////////////////////
@@ -213,7 +217,7 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred
 #define BAMBOO_BASE_VA 0xd000000
 #ifdef GC_DEBUG
 #include "structdefs.h"
-#define BAMBOO_NUM_PAGES (NUMCORES*(1+1))
+#define BAMBOO_NUM_PAGES (NUMCORES*(2+3)+5)
 #define BAMBOO_PAGE_SIZE (16 * 16)
 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
 #else
@@ -226,9 +230,23 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred
 #ifdef MULTICORE_GC
 #include "multicoregarbage.h"
 
+typedef enum {
+       SMEMLOCAL = 0x0, // 0x0, using local mem only
+       SMEMFIXED,       // 0x1, use local mem in lower address space(1 block only)
+                        //      and global mem in higher address space
+       SMEMMIXED,        // 0x2, like FIXED mode but use a threshold to control
+       SMEMGLOBAL,       // 0x3, using global mem only
+       SMEMEND
+} SMEMSTRATEGY;
+
+SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; 
+                              //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
+
 struct freeMemItem {
        INTPTR ptr;
        int size;
+       int startblock;  
+       int endblock;
        struct freeMemItem * next;
 };
 
@@ -237,16 +255,13 @@ struct freeMemList {
        struct freeMemItem * tail;
 };
 
-volatile bool smemflag;
 struct freeMemList * bamboo_free_mem_list;
-volatile INTPTR bamboo_cur_msp;
-volatile int bamboo_smem_size;
 #else
-volatile bool smemflag;
 volatile mspace bamboo_free_msp;
+#endif
+volatile bool smemflag;
 volatile INTPTR bamboo_cur_msp;
 volatile int bamboo_smem_size;
-#endif
 
 // for test TODO
 int total_num_t6;
@@ -296,11 +311,11 @@ bool reside;
 ////////////////////////////////////////////////////////////
 #ifdef TASK
 #ifdef MULTICORE
-inline void initialization(void) __attribute__((always_inline));
-inline void initCommunication(void) __attribute__((always_inline));
-inline void fakeExecution(void) __attribute__((always_inline));
-inline void terminate(void) __attribute__((always_inline));
-inline void initlock(struct ___Object___ * v) __attribute__((always_inline));
+INLINE void initialization(void);
+INLINE void initCommunication(void);
+INLINE void fakeExecution(void);
+INLINE void terminate(void);
+INLINE void initlock(struct ___Object___ * v);
 
 // lock related functions
 bool getreadlock(void* ptr);
@@ -317,81 +332,81 @@ void releasewritelock_r(void * lock, void * redirectlock);
 // if return -1: the lock request is redirected
 //            0: the lock request is approved
 //            1: the lock request is denied
-inline int processlockrequest(int locktype, 
+INLINE int processlockrequest(int locktype, 
                                          int lock, 
                                                                                                                        int obj, 
                                                                                                                        int requestcore, 
                                                                                                                        int rootrequestcore, 
-                                                                                                                       bool cache) __attribute__((always_inline));
-inline void processlockrelease(int locktype, 
+                                                                                                                       bool cache);
+INLINE void processlockrelease(int locktype, 
                                           int lock, 
                                                                                                                         int redirectlock, 
-                                                                                                                        bool redirect)__attribute__((always_inline));
+                                                                                                                        bool redirect);
 
 // msg related functions
-inline void send_hanging_msg() __attribute__((always_inline));
-inline void send_msg_1(int targetcore, 
-                                  unsigned long n0) __attribute__((always_inline));
-inline void send_msg_2(int targetcore, 
+INLINE void send_hanging_msg();
+INLINE void send_msg_1(int targetcore, 
+                                  unsigned long n0);
+INLINE void send_msg_2(int targetcore, 
                                   unsigned long n0, 
-                                                                                        unsigned long n1) __attribute__((always_inline));
-inline void send_msg_3(int targetcore, 
+                                                                                        unsigned long n1);
+INLINE void send_msg_3(int targetcore, 
                                   unsigned long n0, 
                                                                                         unsigned long n1, 
-                                                                                        unsigned long n2) __attribute__((always_inline));
-inline void send_msg_4(int targetcore, 
+                                                                                        unsigned long n2);
+INLINE void send_msg_4(int targetcore, 
                                   unsigned long n0, 
                                                                                         unsigned long n1, 
                                                                                         unsigned long n2, 
-                                                                                        unsigned long n3) __attribute__((always_inline));
-inline void send_msg_5(int targetcore, 
+                                                                                        unsigned long n3);
+INLINE void send_msg_5(int targetcore, 
                                   unsigned long n0, 
                                                                                         unsigned long n1, 
                                                                                         unsigned long n2, 
                                                                                         unsigned long n3, 
-                                                                                        unsigned long n4) __attribute__((always_inline));
-inline void send_msg_6(int targetcore, 
+                                                                                        unsigned long n4);
+INLINE void send_msg_6(int targetcore, 
                                   unsigned long n0, 
                                                                                         unsigned long n1, 
                                                                                         unsigned long n2, 
                                                                                         unsigned long n3, 
                                                                                         unsigned long n4, 
-                                                                                        unsigned long n5) __attribute__((always_inline));
-inline void cache_msg_2(int targetcore, 
+                                                                                        unsigned long n5);
+INLINE void cache_msg_2(int targetcore, 
                                    unsigned long n0, 
-                                                                                               unsigned long n1) __attribute__((always_inline));
-inline void cache_msg_3(int targetcore, 
+                                                                                               unsigned long n1);
+INLINE void cache_msg_3(int targetcore, 
                                    unsigned long n0, 
                                                                                                unsigned long n1, 
-                                                                                               unsigned long n2) __attribute__((always_inline));
-inline void cache_msg_4(int targetcore, 
+                                                                                               unsigned long n2);
+INLINE void cache_msg_4(int targetcore, 
                                    unsigned long n0, 
                                                                                                unsigned long n1, 
                                                                                                unsigned long n2, 
-                                                                                               unsigned long n3) __attribute__((always_inline));
-inline void cache_msg_5(int targetcore, 
+                                                                                               unsigned long n3);
+INLINE void cache_msg_5(int targetcore, 
                                    unsigned long n0, 
                                                                                                unsigned long n1, 
                                                                                                unsigned long n2, 
                                                                                                unsigned long n3, 
-                                                                                               unsigned long n4) __attribute__((always_inline));
-inline void cache_msg_6(int targetcore, 
+                                                                                               unsigned long n4);
+INLINE void cache_msg_6(int targetcore, 
                                    unsigned long n0, 
                                                                                                unsigned long n1, 
                                                                                                unsigned long n2, 
                                                                                                unsigned long n3, 
                                                                                                unsigned long n4, 
-                                                                                               unsigned long n5) __attribute__((always_inline));
-inline void transferObject(struct transObjInfo * transObj);
-inline int receiveMsg(void) __attribute__((always_inline));
+                                                                                               unsigned long n5);
+INLINE void transferObject(struct transObjInfo * transObj);
+INLINE int receiveMsg(void);
 
 #ifdef MULTICORE_GC
-inline void transferMarkResults() __attribute__((always_inline));
+INLINE void transferMarkResults();
 #endif
 
 #ifdef PROFILE
-inline void profileTaskStart(char * taskname) __attribute__((always_inline));
-inline void profileTaskEnd(void) __attribute__((always_inline));
+INLINE void profileTaskStart(char * taskname);
+INLINE void profileTaskEnd(void);
 void outputProfileData();
 #endif  // #ifdef PROFILE
 ///////////////////////////////////////////////////////////
index 697599e0d26bf006b12fd99f16e05559037bb9e3..d3d62bc30219e2fad3f899fd4e4b736f59e00c9b 100644 (file)
@@ -18,6 +18,24 @@ int enqueuetasks_I(struct parameterwrapper *parameter,
                                                                         int * enterflags, 
                                                                         int numenterflags);
 
+#ifdef MULTICORE_GC
+inline __attribute__((always_inline)) 
+void setupsmemmode(void) {
+#ifdef SMEML
+       bamboo_smem_mode = SMEMLOCAL;
+#elif defined SMEMF
+       bamboo_smem_mode = SMEMFIXED;
+#elif defined SMEMM
+       bamboo_smem_mode = SMEMMIXED;
+#elif defined SMEMG
+       bamboo_smem_mode = SMEMGLOBAL;
+#else
+       // defaultly using local mode
+       bamboo_smem_mode = SMEMLOCAL;
+#endif
+} // void setupsmemmode(void)
+#endif
+
 inline __attribute__((always_inline)) 
 void initruntimedata() {
        int i;
@@ -94,6 +112,7 @@ void initruntimedata() {
        gcmovepending = 0;
        gcblock2fill = 0;
        gcsbstarttbl = BAMBOO_BASE_VA;
+       gcsmemtbl = RUNMALLOC_I(sizeof(int)*gcnumblock);
 #else
        // create the lock table, lockresult table and obj queue
   locktable.size = 20;
@@ -1162,42 +1181,208 @@ inline void addNewObjInfo(void * nobj) {
 }
 #endif
 
-void * smemalloc(int size, 
-                            int * allocsize) {
-       void * mem = NULL;
-       int isize = size+(BAMBOO_CACHE_LINE_SIZE);
-       int toallocate = ((size+(BAMBOO_CACHE_LINE_SIZE))>(BAMBOO_SMEM_SIZE)) ? 
-                                    (size+(BAMBOO_CACHE_LINE_SIZE)):(BAMBOO_SMEM_SIZE);
-#ifdef MULTICORE_GC
-       // go through free mem list for suitable blocks
+struct freeMemItem * findFreeMemChunk(int coren,
+                                                 int isize,
+                                                 int * tofindb) {
        struct freeMemItem * freemem = bamboo_free_mem_list->head;
        struct freeMemItem * prev = NULL;
+       int i = 0;
+       int j = 0;
+       *tofindb = gc_core2block[2*coren+i]+124*j;
+       // check available shared mem chunks
        do {
-               if(freemem->size >= isize) {
+               int foundsmem = 0;
+               switch(bamboo_smem_mode) {
+                       case SMEMLOCAL: {
+                               int startb = freemem->startblock;
+                               int endb = freemem->endblock;
+                               while(startb > *tofindb) {
+                                       i++;
+                                       if(2==i) {
+                                               i = 0;
+                                               j++;
+                                       }
+                                       *tofindb = gc_core2block[2*coren+i]+124*j;
+                               } // while(startb > tofindb)
+                               if(startb <= *tofindb) {
+                                       if((endb >= *tofindb) && (freemem->size >= isize)) {
+                                               foundsmem = 1;
+                                       } else if(*tofindb > gcnumblock-1) {
+                                               // no more local mem
+                                               foundsmem = 2;
+                                       } // if(endb >= tofindb) 
+                               } // if(startb <= tofindb)
+                               break;
+                       }
+
+                       case SMEMFIXED: {
+                               int startb = freemem->startblock;
+                               int endb = freemem->endblock;
+                               if(startb <= *tofindb) {
+                                       if((endb >= *tofindb)  && (freemem->size >= isize)) {
+                                               foundsmem = 1;
+                                       } 
+                               } else {
+                                       // use the global mem
+                                       if(((startb > NUMCORES-1) && (freemem->size >= isize)) || 
+                                                       ((endb > NUMCORES-1) && ((freemem->size-
+                                                               (gcbaseva+BAMBOO_LARGE_SMEM_BOUND-freemem->ptr))>=isize))) {
+                                               foundsmem = 1;
+                                       }
+                               }
+                               break;
+                       }
+
+                       case SMEMMIXED: {
+                               // TODO not supported yet
+                               BAMBOO_EXIT(0xe001);
+                               break;
+                       }
+
+                       case SMEMGLOBAL: {
+                   foundsmem = (freemem->size >= isize);
+                               break;
+                       }
+                       default:
+                               break;
+               }
+
+               if(1 == foundsmem) {
                        // found one
                        break;
+               } else if (2 == foundsmem) {
+                       // terminate, no more mem
+                       freemem = NULL;
+                       break;
                }
                prev = freemem;
                freemem = freemem->next;
        } while(freemem != NULL);
-       if(freemem != NULL) {
-               mem = (void *)(freemem->ptr);
-               // check the remaining space in this block
-               int remain = (int)(mem-(BAMBOO_BASE_VA));
-               int bound = (BAMBOO_SMEM_SIZE);
-               if(remain < BAMBOO_LARGE_SMEM_BOUND) {
-                       bound = (BAMBOO_SMEM_SIZE_L);
-               }
-               remain = bound - remain%bound;
-               if(remain < isize) {
-                       // this object acrosses blocks
-                       *allocsize = isize;
-               } else {
-                       // round the asigned block to the end of the current block
-                       *allocsize = remain;
-               }
+
+       return freemem;
+} // struct freeMemItem * findFreeMemChunk(int, int, int *)
+
+void * localmalloc(int tofindb,
+                              int isize,
+                              struct freeMemItem * freemem,
+                              int * allocsize) {
+       void * mem = NULL;
+       int startb = freemem->startblock;
+       int endb = freemem->endblock;
+       int tmpptr = gcbaseva+((tofindb<NUMCORES)?tofindb*BAMBOO_SMEM_SIZE_L
+               :BAMBOO_LARGE_SMEM_BOUND+(tofindb-NUMCORES)*BAMBOO_SMEM_SIZE);
+       if((freemem->size+freemem->ptr-tmpptr)>=isize) {
+               mem = (tmpptr>freemem->ptr)?((void *)tmpptr):(freemem->ptr);
+       } else {
+               mem = (void *)(freemem->size+freemem->ptr-isize);
+       }
+       // check the remaining space in this block
+       int remain = (int)(mem-gcbaseva);
+       int bound = (BAMBOO_SMEM_SIZE);
+       if(remain < BAMBOO_LARGE_SMEM_BOUND) {
+               bound = (BAMBOO_SMEM_SIZE_L);
+       }
+       remain = bound - remain%bound;
+       if(remain < isize) {
+               // this object acrosses blocks
+               *allocsize = isize;
+       } else {
+               // round the asigned block to the end of the current block
+               *allocsize = remain;
+       }
+       if(freemem->ptr == (int)mem) {
                freemem->ptr = ((void*)freemem->ptr) + (*allocsize);
                freemem->size -= *allocsize;
+               BLOCKINDEX(freemem->ptr, &(freemem->startblock));
+       } else if((freemem->ptr+freemem->size) == ((int)mem+(*allocsize))) {
+               freemem->size -= *allocsize;
+               BLOCKINDEX(((int)mem)-1, &(freemem->endblock));
+       } else {
+               struct freeMemItem * tmp = 
+                       (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
+               tmp->ptr = (int)mem+*allocsize;
+               tmp->size = freemem->ptr+freemem->size-(int)mem-*allocsize;
+               BLOCKINDEX(tmp->ptr, &(tmp->startblock));
+               tmp->endblock = freemem->endblock;
+               tmp->next = freemem->next;
+               freemem->next = tmp;
+               freemem->size = (int)mem - freemem->ptr;
+               BLOCKINDEX(((int)mem-1), &(freemem->endblock));
+       }
+       return mem;
+} // void * localmalloc(int, int, struct freeMemItem *, int *)
+
+void * globalmalloc(int isize,
+                               struct freeMemItem * freemem,
+                               int * allocsize) {
+       void * mem = (void *)(freemem->ptr);
+       // check the remaining space in this block
+       int remain = (int)(mem-(BAMBOO_BASE_VA));
+       int bound = (BAMBOO_SMEM_SIZE);
+       if(remain < BAMBOO_LARGE_SMEM_BOUND) {
+               bound = (BAMBOO_SMEM_SIZE_L);
+       }
+       remain = bound - remain%bound;
+       if(remain < isize) {
+               // this object acrosses blocks
+               *allocsize = isize;
+       } else {
+               // round the asigned block to the end of the current block
+               *allocsize = remain;
+       }
+       freemem->ptr = ((void*)freemem->ptr) + (*allocsize);
+       freemem->size -= *allocsize;
+       return mem;
+} // void * globalmalloc(int, struct freeMemItem *, int *)
+
+// malloc from the shared memory
+void * smemalloc(int coren,
+                            int size, 
+                            int * allocsize) {
+       void * mem = NULL;
+       int isize = size+(BAMBOO_CACHE_LINE_SIZE);
+       int toallocate = ((size+(BAMBOO_CACHE_LINE_SIZE))>(BAMBOO_SMEM_SIZE)) ? 
+                                    (size+(BAMBOO_CACHE_LINE_SIZE)):(BAMBOO_SMEM_SIZE);
+#ifdef MULTICORE_GC
+       // go through free mem list for suitable chunks
+       int tofindb = 0;
+       struct freeMemItem * freemem = findFreeMemChunk(coren, isize, &tofindb);
+
+       // allocate shared mem if available
+       if(freemem != NULL) {
+               switch(bamboo_smem_mode) {
+                       case SMEMLOCAL: {
+                               mem = localmalloc(tofindb, isize, freemem, allocsize);
+                               break;
+                       }
+
+                       case SMEMFIXED: {
+                               int startb = freemem->startblock;
+                               int endb = freemem->endblock;
+                               if(startb > tofindb) {
+                                       // malloc on global mem
+                                       mem = globalmalloc(isize, freemem, allocsize);
+                               } else {
+                                       // malloc on local mem
+                                       mem = localmalloc(tofindb, isize, freemem, allocsize);
+                               }
+                               break;
+                       }
+
+                       case SMEMMIXED: {
+                               // TODO not supported yet
+                               BAMBOO_EXIT(0xe002);
+                               break;
+                       }
+
+                       case SMEMGLOBAL: {
+                               mem = globalmalloc(isize,freemem, allocsize);
+                               break;
+                       }
+
+                       default:
+                               break;
+               }
        } else {
 #else
        mem = mspace_calloc(bamboo_free_msp, 1, isize);
@@ -1250,12 +1435,12 @@ msg:
                                RUNMALLOC_I(sizeof(struct transObjInfo));
       int k = 0;
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                        BAMBOO_DEBUGPRINT(0xe880);
 #endif
 #endif
       if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                                BAMBOO_EXIT(0xa002);
@@ -1267,13 +1452,13 @@ msg:
       for(k = 0; k < transObj->length; ++k) {
                                transObj->queues[2*k] = msgdata[3+2*k];
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k]);
 #endif
 #endif
                                transObj->queues[2*k+1] = msgdata[3+2*k+1];
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k+1]);
 #endif
 #endif
@@ -1308,14 +1493,14 @@ msg:
       // receive a stall msg
       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive stall msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                                BAMBOO_EXIT(0xa003);
       } 
       if(msgdata[1] < NUMCORES) {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT(0xe881);
 #endif
 #endif
@@ -1357,14 +1542,14 @@ msg:
     case LOCKGROUNT: {
       // receive lock grount msg
       if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                                BAMBOO_EXIT(0xa004);
       } 
       if((lockobj == msgdata[2]) && (lock2require == msgdata[3])) {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT(0xe882);
 #endif
 #endif
@@ -1375,7 +1560,7 @@ msg:
 #endif
                        } else {
                                // conflicts on lockresults
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                                BAMBOO_EXIT(0xa005);
@@ -1386,14 +1571,14 @@ msg:
     case LOCKDENY: {
       // receive lock deny msg
       if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                                BAMBOO_EXIT(0xa006);
       } 
       if((lockobj == msgdata[2]) && (lock2require == msgdata[3])) {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT(0xe883);
 #endif
 #endif
@@ -1404,7 +1589,7 @@ msg:
 #endif
                                } else {
                                // conflicts on lockresults
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                                BAMBOO_EXIT(0xa007);
@@ -1427,7 +1612,7 @@ msg:
                                BAMBOO_EXIT(0xa008);
       }
 #ifdef DEBUG
-#ifndef TILEAR
+#ifndef CLOSE_PRINT
                        BAMBOO_DEBUGPRINT(0xe885);
 #endif
 #endif
@@ -1446,13 +1631,13 @@ msg:
       // receive a profile output finish msg
       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                                // non startup core can not receive profile output finish msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                                BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                                BAMBOO_EXIT(0xa009);
       }
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                        BAMBOO_DEBUGPRINT(0xe886);
 #endif
 #endif
@@ -1492,14 +1677,14 @@ msg:
        case REDIRECTGROUNT: {
                // receive a lock grant msg with redirect info
                if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                        BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                        BAMBOO_EXIT(0xa00a);
                }
                if(lockobj == msgdata[2]) {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT(0xe891);
 #endif
 #endif
@@ -1511,7 +1696,7 @@ msg:
 #endif
                } else {
                  // conflicts on lockresults
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                  BAMBOO_EXIT(0xa00b);
@@ -1522,14 +1707,14 @@ msg:
        case REDIRECTDENY: {
          // receive a lock deny msg with redirect info
          if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                  BAMBOO_EXIT(0xa00c);
          }
                if(lockobj == msgdata[2]) {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT(0xe892);
 #endif
 #endif
@@ -1540,7 +1725,7 @@ msg:
 #endif
                } else {
                  // conflicts on lockresults
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                  BAMBOO_EXIT(0xa00d);
@@ -1564,7 +1749,7 @@ msg:
                } else {
                  // send response msg
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT(0xe887);
 #endif
 #endif
@@ -1585,13 +1770,13 @@ msg:
          // receive a status confirm info
          if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // wrong core to receive such msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                  BAMBOO_EXIT(0xa00f);
                } else {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT(0xe888);
 #endif
 #endif
@@ -1608,7 +1793,7 @@ msg:
        case TERMINATE: {
          // receive a terminate msg
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                BAMBOO_DEBUGPRINT(0xe889);
 #endif
 #endif
@@ -1621,13 +1806,13 @@ msg:
          // receive a shared memory request msg
          if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // wrong core to receive such msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                  BAMBOO_EXIT(0xa010);
                } else {
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT(0xe88a);
 #endif
 #endif
@@ -1638,7 +1823,7 @@ msg:
                        }
 #endif
                        int allocsize = 0;
-                 void * mem = smemalloc(msgdata[1], &allocsize);
+                 void * mem = smemalloc(msgdata[2], msgdata[1], &allocsize);
                        if(mem == NULL) {
                                break;
                        }
@@ -1655,7 +1840,7 @@ msg:
        case MEMRESPONSE: {
                // receive a shared memory response msg
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
          BAMBOO_DEBUGPRINT(0xe88b);
 #endif
 #endif
@@ -1703,7 +1888,7 @@ msg:
        case GCSTART: {
                // receive a start GC msg
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
          BAMBOO_DEBUGPRINT(0xe88c);
 #endif
 #endif
@@ -1729,7 +1914,7 @@ msg:
                // received a init phase finish msg
                if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive this msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb001);
@@ -1747,7 +1932,7 @@ msg:
                // received a mark phase finish msg
                if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive this msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb002);
@@ -1765,7 +1950,7 @@ msg:
                if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive this msg
                  // return -1
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb003);
@@ -1836,7 +2021,7 @@ msg:
                if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive this msg
                  // return -1
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb004);
@@ -1877,7 +2062,7 @@ msg:
                // received a marked phase finish confirm response msg
                if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // wrong core to receive such msg
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                  BAMBOO_EXIT(0xb006);
@@ -1959,7 +2144,7 @@ msg:
                numconfirm--;
 
                if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
-#ifndef TILERA
+#ifndef CLOSE_PRINT
                        BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
                        BAMBOO_EXIT(0xb009);
@@ -1997,7 +2182,7 @@ msg:
        msgtype = -1;
        msglength = 30;
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
        BAMBOO_DEBUGPRINT(0xe88d);
 #endif
 #endif
@@ -2014,7 +2199,7 @@ msg:
 } else {
        // not a whole msg
 #ifdef DEBUG
-#ifndef TILERA
+#ifndef CLOSE_PRINT
        BAMBOO_DEBUGPRINT(0xe88e);
 #endif
 #endif
@@ -2277,7 +2462,6 @@ void executetasks() {
   int andmask=0;
   int checkmask=0;
 
-
 newtask:
   while(hashsize(activetasks)>0) {
 #ifdef MULTICORE_GC
@@ -2296,11 +2480,11 @@ newtask:
 #endif
 #endif
          busystatus = true;
-      currtpd=(struct taskparamdescriptor *) getfirstkey(activetasks);
-      genfreekey(activetasks, currtpd);
+               currtpd=(struct taskparamdescriptor *) getfirstkey(activetasks);
+               genfreekey(activetasks, currtpd);
 
-      numparams=currtpd->task->numParameters;
-      numtotal=currtpd->task->numTotal;
+               numparams=currtpd->task->numParameters;
+               numtotal=currtpd->task->numTotal;
 
          // clear the lockRedirectTbl 
                // (TODO, this table should be empty after all locks are released)
index b34e9a541812c53fb4924c243e54fae9706d5cc8..fca662c426c187f6fb8740ac54ce4666ee7be595 100644 (file)
@@ -138,17 +138,20 @@ extern struct ___Object___ * ___fcrevert___;
 
 #ifdef MULTICORE
 inline void run(void * arg);
+#ifdef MULTICORE_GC
+inline void setupsmemmode(void);
+#endif
 int receiveObject(void);
 void flagorand(void * ptr, int ormask, int andmask, struct parameterwrapper ** queues, int length);
 void flagorandinit(void * ptr, int ormask, int andmask);
-void enqueueObject(void * ptr, struct parameterwrapper ** queues, int length);
+void enqueueObject(void * ptr, struct parameterwrapper ** queues,int length);
 #ifdef PROFILE
 inline void setTaskExitIndex(int index);
 inline void addNewObjInfo(void * nobj);
 #endif
 int * getAliasLock(void ** ptrs, int length, struct RuntimeHash * tbl);
 void addAliasLock(void * ptr, int lock);
-void * smemalloc(int size, int * allocsize);
+void * smemalloc(int coren, int size, int * allocsize);
 #else
 void flagorand(void * ptr, int ormask, int andmask);
 void flagorandinit(void * ptr, int ormask, int andmask);
index 98b24f6abdf9c721937c87bdd6c6a979926b35d8..b6a8b3a562d13d8952272d932f6a0310f16f34cb 100755 (executable)
@@ -602,7 +602,7 @@ cd $TILERADIR
 make clean
 rm ./*
 
-export TILERACFLAGS="-DTASK -DMULTICORE"
+export TILERACFLAGS="-DTASK -DMULTICORE -DCLOSE_PRINT"
 
 if $CACHEFLUSHFLAG
 then # print path