From: jzhou Date: Wed, 13 Oct 2010 00:17:52 +0000 (+0000) Subject: Changes to accommodate the runtime for multicore gc w/o tasks X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5bc912832bd60a6b934b5a542f2c1f5dbf3d09cc;p=IRC.git Changes to accommodate the runtime for multicore gc w/o tasks --- diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.h b/Robust/src/Runtime/bamboo/multicoregarbage.h index 85570d21..bb428395 100644 --- a/Robust/src/Runtime/bamboo/multicoregarbage.h +++ b/Robust/src/Runtime/bamboo/multicoregarbage.h @@ -1,7 +1,7 @@ #ifndef MULTICORE_GARBAGE_H #define MULTICORE_GARBAGE_H #include "multicoregc.h" -#include "multicorehelper.h" // for mappins between core # and block # +#include "multicorehelper.h" // for mappings between core # and block # #include "structdefs.h" #include "MGCHash.h" #include "GCSharedHash.h" diff --git a/Robust/src/Runtime/bamboo/multicoremem.c b/Robust/src/Runtime/bamboo/multicoremem.c new file mode 100644 index 00000000..f0747f8b --- /dev/null +++ b/Robust/src/Runtime/bamboo/multicoremem.c @@ -0,0 +1,692 @@ +#ifdef MULTICORE +#include "runtime_arch.h" +#include "multicoreruntime.h" + +extern int corenum; + +#ifdef MULTICORE_GC +#include "multicorehelper.h" + +#ifdef SMEMF +#define NUM_CORES2TEST 5 +#ifdef GC_1 +int core2test[1][NUM_CORES2TEST] = { + {0, -1, -1, -1, -1} +}; +#elif defined GC_56 +int core2test[56][NUM_CORES2TEST] = { + { 0, -1, 7, -1, 1}, { 1, -1, 8, 0, 2}, { 2, -1, 9, 1, 3}, + { 3, -1, 10, 2, 4}, { 4, -1, 11, 3, 5}, { 5, -1, 12, 4, 6}, + { 6, -1, 13, 5, -1}, { 7, 0, 14, -1, 8}, { 8, 1, 15, 7, 9}, + { 9, 2, 16, 8, 10}, {10, 3, 17, 9, 11}, {11, 4, 18, 10, 12}, + {12, 5, 19, 11, 13}, {13, 6, 20, 12, -1}, {14, 7, 21, -1, 15}, + {15, 8, 22, 14, 16}, {16, 9, 23, 15, 17}, {17, 10, 24, 16, 18}, + {18, 11, 25, 17, 19}, {19, 12, 26, 18, 20}, {20, 13, 27, 19, -1}, + {21, 14, 28, -1, 22}, {22, 15, 29, 21, 23}, {23, 16, 30, 22, 24}, + {24, 17, 31, 23, 25}, {25, 18, 32, 24, 26}, {26, 19, 33, 25, 27}, + {27, 20, 34, 26, -1}, {28, 21, 35, -1, 29}, {29, 22, 36, 28, 30}, + {30, 23, 37, 29, 31}, {31, 24, 38, 30, 32}, {32, 25, 39, 31, 33}, + {33, 26, 40, 32, 34}, {34, 27, 41, 33, -1}, {35, 28, 42, -1, 36}, + {36, 29, 43, 35, 37}, {37, 30, 44, 36, 38}, {38, 31, 45, 37, 39}, + {39, 32, 46, 38, 40}, {40, 33, 47, 39, 41}, {41, 34, 48, 40, -1}, + {42, 35, 49, -1, 43}, {43, 36, 50, 42, 44}, {44, 37, 51, 43, 45}, + {45, 38, 52, 44, 46}, {46, 39, 53, 45, 47}, {47, 40, 54, 46, 48}, + {48, 41, 55, 47, -1}, {49, 42, -1, -1, 50}, {50, 43, -1, 49, 51}, + {51, 44, -1, 50, 52}, {52, 45, -1, 51, 53}, {53, 46, -1, 52, 54}, + {54, 47, -1, 53, 55}, {55, 48, -1, 54, -1} +}; +#elif defined GC_62 +int core2test[62][NUM_CORES2TEST] = { + { 0, -1, 6, -1, 1}, { 1, -1, 7, 0, 2}, { 2, -1, 8, 1, 3}, + { 3, -1, 9, 2, 4}, { 4, -1, 10, 3, 5}, { 5, -1, 11, 4, -1}, + { 6, 0, 14, -1, 7}, { 7, 1, 15, 6, 8}, { 8, 2, 16, 7, 9}, + { 9, 3, 17, 8, 10}, {10, 4, 18, 9, 11}, {11, 5, 19, 10, 12}, + {12, -1, 20, 11, 13}, {13, -1, 21, 12, -1}, {14, 6, 22, -1, 15}, + {15, 7, 23, 14, 16}, {16, 8, 24, 15, 17}, {17, 9, 25, 16, 18}, + {18, 10, 26, 17, 19}, {19, 11, 27, 18, 20}, {20, 12, 28, 19, 21}, + {21, 13, 29, 28, -1}, {22, 14, 30, -1, 23}, {23, 15, 31, 22, 24}, + {24, 16, 32, 23, 25}, {25, 17, 33, 24, 26}, {26, 18, 34, 25, 27}, + {27, 19, 35, 26, 28}, {28, 20, 36, 27, 29}, {29, 21, 37, 28, -1}, + {30, 22, 38, -1, 31}, {31, 23, 39, 30, 32}, {32, 24, 40, 31, 33}, + {33, 25, 41, 32, 34}, {34, 26, 42, 33, 35}, {35, 27, 43, 34, 36}, + {36, 28, 44, 35, 37}, {37, 29, 45, 36, -1}, {38, 30, 46, -1, 39}, + {39, 31, 47, 38, 40}, {40, 32, 48, 39, 41}, {41, 33, 49, 40, 42}, + {42, 34, 50, 41, 43}, {43, 35, 51, 42, 44}, {44, 36, 52, 43, 45}, + {45, 37, 53, 44, -1}, {46, 38, 54, -1, 47}, {47, 39, 55, 46, 48}, + {48, 40, 56, 47, 49}, {49, 41, 57, 48, 50}, {50, 42, 58, 49, 51}, + {51, 43, 59, 50, 52}, {52, 44, 60, 51, 53}, {53, 45, 61, 52, -1}, + {54, 46, -1, -1, 55}, {55, 47, -1, 54, 56}, {56, 48, -1, 55, 57}, + {57, 49, -1, 56, 59}, {58, 50, -1, 57, 59}, {59, 51, -1, 58, 60}, + {60, 52, -1, 59, 61}, {61, 53, -1, 60, -1} +}; +#endif // GC_1 +#elif defined SMEMM +unsigned int gcmem_mixed_threshold = 0; +unsigned int gcmem_mixed_usedmem = 0; +#define NUM_CORES2TEST 9 +#ifdef GC_1 +int core2test[1][NUM_CORES2TEST] = { + {0, -1, -1, -1, -1, -1, -1, -1, -1} +}; +#elif defined GC_56 +int core2test[56][NUM_CORES2TEST] = { + { 0, -1, 7, -1, 1, -1, 14, -1, 2}, + { 1, -1, 8, 0, 2, -1, 15, -1, 3}, + { 2, -1, 9, 1, 3, -1, 16, 0, 4}, + { 3, -1, 10, 2, 4, -1, 17, 1, 5}, + { 4, -1, 11, 3, 5, -1, 18, 2, 6}, + { 5, -1, 12, 4, 6, -1, 19, 3, -1}, + { 6, -1, 13, 5, -1, -1, 20, 4, -1}, + { 7, 0, 14, -1, 8, -1, 21, -1, 9}, + { 8, 1, 15, 7, 9, -1, 22, -1, 10}, + { 9, 2, 16, 8, 10, -1, 23, 7, 11}, + {10, 3, 17, 9, 11, -1, 24, 8, 12}, + {11, 4, 18, 10, 12, -1, 25, 9, 13}, + {12, 5, 19, 11, 13, -1, 26, 10, -1}, + {13, 6, 20, 12, -1, -1, 27, 11, -1}, + {14, 7, 21, -1, 15, 0, 28, -1, 16}, + {15, 8, 22, 14, 16, 1, 29, -1, 17}, + {16, 9, 23, 15, 17, 2, 30, 14, 18}, + {17, 10, 24, 16, 18, 3, 31, 15, 19}, + {18, 11, 25, 17, 19, 4, 32, 16, 20}, + {19, 12, 26, 18, 20, 5, 33, 17, -1}, + {20, 13, 27, 19, -1, 6, 34, 18, -1}, + {21, 14, 28, -1, 22, 7, 35, -1, 23}, + {22, 15, 29, 21, 23, 8, 36, -1, 24}, + {23, 16, 30, 22, 24, 9, 37, 21, 25}, + {24, 17, 31, 23, 25, 10, 38, 22, 26}, + {25, 18, 32, 24, 26, 11, 39, 23, 27}, + {26, 19, 33, 25, 27, 12, 40, 24, -1}, + {27, 20, 34, 26, -1, 13, 41, 25, -1}, + {28, 21, 35, -1, 29, 14, 42, -1, 30}, + {29, 22, 36, 28, 30, 15, 43, -1, 31}, + {30, 23, 37, 29, 31, 16, 44, 28, 32}, + {31, 24, 38, 30, 32, 17, 45, 29, 33}, + {32, 25, 39, 31, 33, 18, 46, 30, 34}, + {33, 26, 40, 32, 34, 19, 47, 31, -1}, + {34, 27, 41, 33, -1, 20, 48, 32, -1}, + {35, 28, 42, -1, 36, 21, 49, -1, 37}, + {36, 29, 43, 35, 37, 22, 50, -1, 38}, + {37, 30, 44, 36, 38, 23, 51, 35, 39}, + {38, 31, 45, 37, 39, 24, 52, 36, 40}, + {39, 32, 46, 38, 40, 25, 53, 37, 41}, + {40, 33, 47, 39, 41, 26, 54, 38, -1}, + {41, 34, 48, 40, -1, 27, 55, 39, -1}, + {42, 35, 49, -1, 43, 28, -1, -1, 44}, + {43, 36, 50, 42, 44, 29, -1, -1, 45}, + {44, 37, 51, 43, 45, 30, -1, 42, 46}, + {45, 38, 52, 44, 46, 31, -1, 43, 47}, + {46, 39, 53, 45, 47, 32, -1, 44, 48}, + {47, 40, 54, 46, 48, 33, -1, 45, -1}, + {48, 41, 55, 47, -1, 34, -1, 46, -1}, + {49, 42, -1, -1, 50, 35, -1, -1, 51}, + {50, 43, -1, 49, 51, 36, -1, -1, 52}, + {51, 44, -1, 50, 52, 37, -1, 49, 53}, + {52, 45, -1, 51, 53, 38, -1, 50, 54}, + {53, 46, -1, 52, 54, 39, -1, 51, 55}, + {54, 47, -1, 53, 55, 40, -1, 52, -1}, + {55, 48, -1, 54, -1, 41, -1, 53, -1} +}; +#elif defined GC_62 +int core2test[62][NUM_CORES2TEST] = { + { 0, -1, 6, -1, 1, -1, 14, -1, 2}, + { 1, -1, 7, 0, 2, -1, 15, -1, 3}, + { 2, -1, 8, 1, 3, -1, 16, 0, 4}, + { 3, -1, 9, 2, 4, -1, 17, 1, 5}, + { 4, -1, 10, 3, 5, -1, 18, 2, -1}, + { 5, -1, 11, 4, -1, -1, 19, 3, -1}, + { 6, 0, 14, -1, 7, -1, 22, -1, 8}, + { 7, 1, 15, 6, 8, -1, 23, -1, 9}, + { 8, 2, 16, 7, 9, -1, 24, 6, 10}, + { 9, 3, 17, 8, 10, -1, 25, 7, 11}, + {10, 4, 18, 9, 11, -1, 26, 8, 12}, + {11, 5, 19, 10, 12, -1, 27, 9, 13}, + {12, -1, 20, 11, 13, -1, 28, 10, -1}, + {13, -1, 21, 12, -1, -1, 29, 11, -1}, + {14, 6, 22, -1, 15, 0, 30, -1, 16}, + {15, 7, 23, 14, 16, 1, 31, -1, 17}, + {16, 8, 24, 15, 17, 2, 32, 14, 18}, + {17, 9, 25, 16, 18, 3, 33, 15, 19}, + {18, 10, 26, 17, 19, 4, 34, 16, 20}, + {19, 11, 27, 18, 20, 5, 35, 17, 21}, + {20, 12, 28, 19, 21, -1, 36, 18, -1}, + {21, 13, 29, 28, -1, -1, 37, 19, -1}, + {22, 14, 30, -1, 23, 6, 38, -1, 24}, + {23, 15, 31, 22, 24, 7, 39, -1, 25}, + {24, 16, 32, 23, 25, 8, 40, 22, 26}, + {25, 17, 33, 24, 26, 9, 41, 23, 27}, + {26, 18, 34, 25, 27, 10, 42, 24, 28}, + {27, 19, 35, 26, 28, 11, 43, 25, 29}, + {28, 20, 36, 27, 29, 12, 44, 26, -1}, + {29, 21, 37, 28, -1, 13, 45, 27, -1}, + {30, 22, 38, -1, 31, 22, 46, -1, 32}, + {31, 23, 39, 30, 32, 15, 47, -1, 33}, + {32, 24, 40, 31, 33, 16, 48, 30, 34}, + {33, 25, 41, 32, 34, 17, 49, 31, 35}, + {34, 26, 42, 33, 35, 18, 50, 32, 36}, + {35, 27, 43, 34, 36, 19, 51, 33, 37}, + {36, 28, 44, 35, 37, 20, 52, 34, -1}, + {37, 29, 45, 36, -1, 21, 53, 35, -1}, + {38, 30, 46, -1, 39, 22, 54, -1, 40}, + {39, 31, 47, 38, 40, 23, 55, -1, 41}, + {40, 32, 48, 39, 41, 24, 56, 38, 42}, + {41, 33, 49, 40, 42, 25, 57, 39, 43}, + {42, 34, 50, 41, 43, 26, 58, 40, 44}, + {43, 35, 51, 42, 44, 27, 59, 41, 45}, + {44, 36, 52, 43, 45, 28, 60, 42, -1}, + {45, 37, 53, 44, -1, 29, 61, 43, -1}, + {46, 38, 54, -1, 47, 30, -1, -1, 48}, + {47, 39, 55, 46, 48, 31, -1, -1, 49}, + {48, 40, 56, 47, 49, 32, -1, 46, 50}, + {49, 41, 57, 48, 50, 33, -1, 47, 51}, + {50, 42, 58, 49, 51, 34, -1, 48, 52}, + {51, 43, 59, 50, 52, 35, -1, 49, 53}, + {52, 44, 60, 51, 53, 36, -1, 50, -1}, + {53, 45, 61, 52, -1, 37, -1, 51, -1}, + {54, 46, -1, -1, 55, 38, -1, -1, 56}, + {55, 47, -1, 54, 56, 39, -1, -1, 57}, + {56, 48, -1, 55, 57, 40, -1, 54, 58}, + {57, 49, -1, 56, 59, 41, -1, 55, 59}, + {58, 50, -1, 57, 59, 42, -1, 56, 60}, + {59, 51, -1, 58, 60, 43, -1, 57, 61}, + {60, 52, -1, 59, 61, 44, -1, 58, -1}, + {61, 53, -1, 60, -1, 45, -1, 59, -1} +}; +#endif // GC_1 +#endif // SMEMF + +INLINE void setupsmemmode(void) { +#ifdef SMEML + // Only allocate local mem chunks to each core. + // If a core has used up its local shared memory, start gc. + bamboo_smem_mode = SMEMLOCAL; +#elif defined SMEMF + // Allocate the local shared memory to each core with the highest priority, + // if a core has used up its local shared memory, try to allocate the + // shared memory that belong to its neighbours, if also failed, start gc. + bamboo_smem_mode = SMEMFIXED; +#elif defined SMEMM + // Allocate the local shared memory to each core with the highest priority, + // if a core has used up its local shared memory, try to allocate the + // shared memory that belong to its neighbours first, if failed, check + // current memory allocation rate, if it has already reached the threshold, + // start gc, otherwise, allocate the shared memory globally. If all the + // shared memory has been used up, start gc. + bamboo_smem_mode = SMEMMIXED; +#elif defined SMEMG + // Allocate all the memory chunks globally, do not consider the host cores + // When all the shared memory are used up, start gc. + bamboo_smem_mode = SMEMGLOBAL; +#else + // defaultly using local mode + bamboo_smem_mode = SMEMLOCAL; +#endif // SMEML +} // void setupsmemmode(void) + +// Only allocate local mem chunks to each core. +// If a core has used up its local shared memory, start gc. +void * localmalloc_I(int coren, + int isize, + int * allocsize) { + void * mem = NULL; + int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC); + int i = 0; + int j = 0; + int tofindb = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j; + int totest = tofindb; + int bound = BAMBOO_SMEM_SIZE_L; + int foundsmem = 0; + int size = 0; + do { + bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; + int nsize = bamboo_smemtbl[totest]; + bool islocal = true; + if(nsize < bound) { + bool tocheck = true; + // have some space in the block + if(totest == tofindb) { + // the first partition + size = bound - nsize; + } else if(nsize == 0) { + // an empty partition, can be appended + size += bound; + } else { + // not an empty partition, can not be appended + // the last continuous block is not big enough, go to check the next + // local block + islocal = true; + tocheck = false; + } // if(totest == tofindb) else if(nsize == 0) else ... + if(tocheck) { + if(size >= isize) { + // have enough space in the block, malloc + foundsmem = 1; + break; + } else { + // no enough space yet, try to append next continuous block + islocal = false; + } // if(size > isize) else ... + } // if(tocheck) + } // if(nsize < bound) + if(islocal) { + // no space in the block, go to check the next block + i++; + if(2==i) { + i = 0; + j++; + } + tofindb = totest = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j; + } else { + totest += 1; + } // if(islocal) else ... + if(totest > gcnumblock-1-bamboo_reserved_smem) { + // no more local mem, do not find suitable block + foundsmem = 2; + break; + } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... + } while(true); + + if(foundsmem == 1) { + // find suitable block + mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb= isize) { + // have enough space in the block, malloc + foundsmem = 1; + break; + } else { + // no enough space yet, try to append next continuous block + // TODO may consider to go to next local block? + islocal = false; + } // if(size > isize) else ... + } // if(tocheck) + } // if(nsize < bound) + if(islocal) { + // no space in the block, go to check the next block + i++; + if(2==i) { + i = 0; + j++; + } + tofindb=totest= + gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; + } else { + totest += 1; + } // if(islocal) else ... + if(totest > gcnumblock-1-bamboo_reserved_smem) { + // no more local mem, do not find suitable block on local mem + // try to malloc shared memory assigned to the neighbour cores + do{ + k++; + if(k >= NUM_CORES2TEST) { + // no more memory available on either coren or its neighbour cores + foundsmem = 2; + goto memsearchresult; + } + } while(core2test[gccorenum][k] == -1); + i = 0; + j = 0; + tofindb=totest= + gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; + } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... + } while(true); + +memsearchresult: + if(foundsmem == 1) { + // find suitable block + mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb= isize) { + // have enough space in the block, malloc + foundsmem = 1; + break; + } else { + // no enough space yet, try to append next continuous block + // TODO may consider to go to next local block? + islocal = false; + } // if(size > isize) else ... + } // if(tocheck) + } // if(nsize < bound) + if(islocal) { + // no space in the block, go to check the next block + i++; + if(2==i) { + i = 0; + j++; + } + tofindb=totest= + gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; + } else { + totest += 1; + } // if(islocal) else ... + if(totest > gcnumblock-1-bamboo_reserved_smem) { + // no more local mem, do not find suitable block on local mem + // try to malloc shared memory assigned to the neighbour cores + do{ + k++; + if(k >= NUM_CORES2TEST) { + if(gcmem_mixed_usedmem >= gcmem_mixed_threshold) { + // no more memory available on either coren or its neighbour cores + foundsmem = 2; + goto memmixedsearchresult; + } else { + // try allocate globally + mem = globalmalloc_I(coren, isize, allocsize); + return mem; + } + } + } while(core2test[gccorenum][k] == -1); + i = 0; + j = 0; + tofindb=totest= + gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; + } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... + } while(true); + +memmixedsearchresult: + if(foundsmem == 1) { + // find suitable block + mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb gcnumblock-1-bamboo_reserved_smem) { + // Out of shared memory + *allocsize = 0; + return NULL; + } + do { + bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; + int nsize = bamboo_smemtbl[totest]; + bool isnext = false; + if(nsize < bound) { + bool tocheck = true; + // have some space in the block + if(totest == tofindb) { + // the first partition + size = bound - nsize; + } else if(nsize == 0) { + // an empty partition, can be appended + size += bound; + } else { + // not an empty partition, can not be appended + // the last continuous block is not big enough, start another block + isnext = true; + tocheck = false; + } // if(totest == tofindb) else if(nsize == 0) else ... + if(tocheck) { + if(size >= isize) { + // have enough space in the block, malloc + foundsmem = 1; + break; + } // if(size > isize) + } // if(tocheck) + } else { + isnext = true; + } // if(nsize < bound) else ... + totest += 1; + if(totest > gcnumblock-1-bamboo_reserved_smem) { + // no more local mem, do not find suitable block + foundsmem = 2; + break; + } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... + if(isnext) { + // start another block + tofindb = totest; + } // if(islocal) + } while(true); + + if(foundsmem == 1) { + // find suitable block + mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb(BAMBOO_SMEM_SIZE)) ? (size) : (BAMBOO_SMEM_SIZE); + if(toallocate > bamboo_free_smem_size) { + // no enough mem + mem = NULL; + } else { + mem = (void *)bamboo_free_smemp; + bamboo_free_smemp = ((void*)bamboo_free_smemp) + toallocate; + bamboo_free_smem_size -= toallocate; + } + *allocsize = toallocate; + if(mem == NULL) { +#endif // MULTICORE_GC + // no enough shared global memory + *allocsize = 0; +#ifdef MULTICORE_GC + if(!gcflag) { + gcflag = true; + // inform other cores to stop and wait for gc + gcprecheck = true; + for(int i = 0; i < NUMCORESACTIVE; i++) { + // reuse the gcnumsendobjs & gcnumreceiveobjs + gccorestatus[i] = 1; + gcnumsendobjs[0][i] = 0; + gcnumreceiveobjs[0][i] = 0; + } + for(int i = 0; i < NUMCORESACTIVE; i++) { + if(i != BAMBOO_NUM_OF_CORE) { + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_1(i, GCSTARTPRE); + } else { + send_msg_1(i, GCSTARTPRE, true); + } + } + } + } + return NULL; +#else + BAMBOO_DEBUGPRINT(0xe003); + BAMBOO_EXIT(0xe003); +#endif + } + return mem; +} // void * smemalloc_I(int, int, int) + +#endif // MULTICORE diff --git a/Robust/src/Runtime/bamboo/multicoremem.h b/Robust/src/Runtime/bamboo/multicoremem.h new file mode 100644 index 00000000..6d124e5e --- /dev/null +++ b/Robust/src/Runtime/bamboo/multicoremem.h @@ -0,0 +1,141 @@ +#ifndef MULTICORE_MEM_H +#define MULTICORE_MEM_H + +#ifndef INTPTR +#ifdef BIT64 +#define INTPTR long +#define INTPTRSHIFT 3 +#else +#define INTPTR int +#define INTPTRSHIFT 2 +#endif +#endif + +#ifndef bool +#define bool int +#define true 1 +#define false 0 +#endif + +// data structures for shared memory allocation +#ifdef TILERA_BME +#define BAMBOO_BASE_VA 0xd000000 +#elif defined TILERA_ZLINUX +#ifdef MULTICORE_GC +#define BAMBOO_BASE_VA 0xd000000 +#endif // MULTICORE_GC +#endif // TILERA_BME + +#ifdef BAMBOO_MEMPROF +#define GC_BAMBOO_NUMCORES 56 +#else +#define GC_BAMBOO_NUMCORES 62 +#endif + +#ifdef GC_DEBUG +#include "structdefs.h" +#define BAMBOO_NUM_BLOCKS (NUMCORES4GC*(2+1)+3) +#define BAMBOO_PAGE_SIZE (64 * 64) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) *(BAMBOO_NUM_BLOCKS)) + +#elif defined GC_CACHE_ADAPT +#ifdef GC_LARGESHAREDHEAP +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+24)) +#else +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+14)) +#endif +#define BAMBOO_PAGE_SIZE (64 * 1024) // 64K +#ifdef GC_LARGEPAGESIZE +#define BAMBOO_PAGE_SIZE (4 * 64 * 1024) +#define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE)) +#elif defined GC_SMALLPAGESIZE +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#elif defined GC_SMALLPAGESIZE2 +#define BAMBOO_PAGE_SIZE (16 * 1024) // (4096) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#elif defined GC_LARGEPAGESIZE2 +#define BAMBOO_PAGE_SIZE (4 * 64 * 1024) // 64K +#define BAMBOO_SMEM_SIZE ((BAMBOO_PAGE_SIZE)) +#else +#define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE)) +#endif // GC_LARGEPAGESIZE +#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) + +#else // GC_DEBUG +#ifdef GC_LARGESHAREDHEAP +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2)) +#elif defined GC_LARGESHAREDHEAP2 +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2)) +#else +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G +#endif +#ifdef GC_LARGEPAGESIZE +#define BAMBOO_PAGE_SIZE (4 * 1024 * 1024) // (4096) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#elif defined GC_SMALLPAGESIZE +#define BAMBOO_PAGE_SIZE (256 * 1024) // (4096) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#elif defined GC_SMALLPAGESIZE2 +#define BAMBOO_PAGE_SIZE (64 * 1024) // (4096) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#else +#define BAMBOO_PAGE_SIZE (1024 * 1024) // (4096) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#endif // GC_LARGEPAGESIZE +#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) //(1024 * 1024 * 240) //((unsigned long long int)(3.0 * 1024 * 1024 * 1024)) // 3G +#endif // GC_DEBUG + +#ifdef MULTICORE_GC +volatile bool gc_localheap_s; +#endif + +#ifdef MULTICORE_GC +#include "multicoregarbage.h" + +typedef enum { + SMEMLOCAL = 0x0,// 0x0, using local mem only + SMEMFIXED, // 0x1, use local mem in lower address space(1 block only) + // and global mem in higher address space + SMEMMIXED, // 0x2, like FIXED mode but use a threshold to control + SMEMGLOBAL, // 0x3, using global mem only + SMEMEND +} SMEMSTRATEGY; + +SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; + //-DSMEMM: MIXED; -DSMEMG: GLOBAL; + +struct freeMemItem { + INTPTR ptr; + int size; + int startblock; + int endblock; + struct freeMemItem * next; +}; + +struct freeMemList { + struct freeMemItem * head; + struct freeMemItem * backuplist; // hold removed freeMemItem for reuse; + // only maintain 1 freemMemItem +}; + +// table recording the number of allocated bytes on each block +// Note: this table resides on the bottom of the shared heap for all cores +// to access +volatile int * bamboo_smemtbl; +volatile int bamboo_free_block; +unsigned int bamboo_reserved_smem; // reserved blocks on the top of the shared + // heap e.g. 20% of the heap and should not + // be allocated otherwise gc is invoked +volatile INTPTR bamboo_smem_zero_top; +#define BAMBOO_SMEM_ZERO_UNIT_SIZE (4 * 1024) // 4KB +#else +//volatile mspace bamboo_free_msp; +INTPTR bamboo_free_smemp; +int bamboo_free_smem_size; +#endif +volatile bool smemflag; +volatile INTPTR bamboo_cur_msp; +volatile int bamboo_smem_size; + +#endif diff --git a/Robust/src/Runtime/bamboo/multicoreruntime.c b/Robust/src/Runtime/bamboo/multicoreruntime.c index 7805ba5b..7e33af83 100644 --- a/Robust/src/Runtime/bamboo/multicoreruntime.c +++ b/Robust/src/Runtime/bamboo/multicoreruntime.c @@ -1,38 +1,35 @@ +#ifdef MULTICORE + #include "runtime.h" +#include "multicoreruntime.h" +#include "runtime_arch.h" +#include "GenericHashtable.h" #include "structdefs.h" #include "mem.h" -#ifndef MULTICORE -#include -#include -#include -#endif #ifndef RAW #include #endif -#ifdef MULTICORE -#include "runtime_arch.h" +#ifdef MGC +#include "thread.h" #endif +#ifndef INLINE +#define INLINE inline __attribute__((always_inline)) +#endif // #ifndef INLINE + extern int classsize[]; extern int typearray[]; extern int typearray2[]; -#ifndef MULTICORE -jmp_buf error_handler; -int instructioncount; -char *options; -int injectfailures=0; -float failurechance=0; -int errors=0; -int injectinstructionfailures; -int failurecount; -float instfailurechance=0; -int numfailures; -int instaccum=0; -#ifdef DMALLOC -#include "dmalloc.h" -#endif +#ifdef TASK +extern struct genhashtable * activetasks; #endif +#ifdef MULTICORE_GC +#ifdef SMEMM +extern unsigned int gcmem_mixed_threshold; +extern unsigned int gcmem_mixed_usedmem; +#endif // SMEMM +#endif // MULTICORE_GC int debugtask=0; @@ -54,60 +51,14 @@ int instanceof(struct ___Object___ *ptr, int type) { return 0; } -#ifdef MULTICORE -void initializeexithandler() { -} -#else -void exithandler(int sig, siginfo_t *info, void * uap) { - BAMBOO_DEBUGPRINT(0xa001); - exit(0); -} - void initializeexithandler() { - struct sigaction sig; - sig.sa_sigaction=&exithandler; - sig.sa_flags=SA_SIGINFO; - sigemptyset(&sig.sa_mask); - sigaction(SIGUSR2, &sig, 0); } -#endif /* This function inject failures */ void injectinstructionfailure() { -#ifdef MULTICORE // not supported in MULTICORE version return; -#else -#ifdef TASK - if (injectinstructionfailures) { - if (numfailures==0) - return; - instructioncount=failurecount; - instaccum+=failurecount; - if ((((double)random())/RAND_MAX)0) - numfailures--; - printf("FAILURE!!! %d\n",numfailures); - longjmp(error_handler,11); - } - } -#else -#ifdef THREADS - if (injectinstructionfailures) { - if (numfailures==0) - return; - instaccum+=failurecount; - if ((((double)random())/RAND_MAX)0) - numfailures--; - printf("FAILURE!!! %d\n",numfailures); - threadexit(); - } - } -#endif -#endif -#endif } #ifdef D___Double______nativeparsedouble____L___String___ @@ -147,14 +98,7 @@ int CALL12(___String______convertdoubletochar____D__AR_C, double ___val___, doub #endif void CALL11(___System______exit____I,int ___status___, int ___status___) { -#ifdef MULTICORE BAMBOO_EXIT(___status___); -#else -#ifdef DEBUG - printf("exit in CALL11\n"); -#endif - exit(___status___); -#endif } void CALL23(___Vector______removeElement_____AR_L___Object____I_I, int ___index___, int ___size___, struct ArrayObject * ___array___, int ___index___, int ___size___) { @@ -163,49 +107,24 @@ void CALL23(___Vector______removeElement_____AR_L___Object____I_I, int ___index_ } void CALL11(___System______printI____I,int ___status___, int ___status___) { -#ifdef MULTICORE BAMBOO_DEBUGPRINT(0x1111); BAMBOO_DEBUGPRINT_REG(___status___); -#else -#ifdef DEBUG - printf("printI in CALL11\n"); -#endif - printf("%d\n", ___status___); -#endif } long CALL00(___System______currentTimeMillis____) { -#ifdef MULTICORE // not supported in MULTICORE version return -1; -#else - struct timeval tv; long long retval; - gettimeofday(&tv, NULL); - retval = tv.tv_sec; /* seconds */ - retval*=1000; /* milliseconds */ - retval+= (tv.tv_usec/1000); /* adjust milliseconds & add them in */ - return retval; -#endif } void CALL01(___System______printString____L___String___,struct ___String___ * ___s___) { -#ifdef MULTICORE -#else - struct ArrayObject * chararray=VAR(___s___)->___value___; - int i; - int offset=VAR(___s___)->___offset___; - for(i=0; i___count___; i++) { - short sc=((short *)(((char *)&chararray->___length___)+sizeof(int)))[i+offset]; - putchar(sc); - } -#endif } /* Object allocation function */ #ifdef MULTICORE_GC void * allocate_new(void * ptr, int type) { - struct ___Object___ * v=(struct ___Object___ *)FREEMALLOC((struct garbagelist *) ptr, classsize[type]); + struct ___Object___ * v= + (struct ___Object___*)FREEMALLOC((struct garbagelist*) ptr,classsize[type]); v->type=type; v->version = 0; v->lock = NULL; @@ -221,7 +140,9 @@ void * allocate_new(void * ptr, int type) { /* Array allocation function */ struct ArrayObject * allocate_newarray(void * ptr, int type, int length) { - struct ArrayObject * v=(struct ArrayObject *)FREEMALLOC((struct garbagelist *) ptr, sizeof(struct ArrayObject)+length*classsize[type]); + struct ArrayObject * v=(struct ArrayObject *) + FREEMALLOC((struct garbagelist*)ptr, + sizeof(struct ArrayObject)+length*classsize[type]); v->type=type; v->version = 0; v->lock = NULL; @@ -250,7 +171,8 @@ void * allocate_new(int type) { /* Array allocation function */ struct ArrayObject * allocate_newarray(int type, int length) { - struct ArrayObject * v=FREEMALLOC(sizeof(struct ArrayObject)+length*classsize[type]); + struct ArrayObject * v= + FREEMALLOC(sizeof(struct ArrayObject)+length*classsize[type]); v->type=type; v->version = 0; v->lock = NULL; @@ -269,9 +191,11 @@ struct ___String___ * NewString(const char *str,int length) { #endif int i; #ifdef MULTICORE_GC - struct ArrayObject * chararray=allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length); + struct ArrayObject * chararray= + allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length); int ptrarray[]={1, (int) ptr, (int) chararray}; - struct ___String___ * strobj=allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE); + struct ___String___ * strobj= + allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE); chararray=(struct ArrayObject *) ptrarray[2]; #else struct ArrayObject * chararray=allocate_newarray(CHARARRAYTYPE, length); @@ -282,7 +206,7 @@ struct ___String___ * NewString(const char *str,int length) { strobj->___offset___=0; for(i=0; i___length___)+sizeof(int)))[i]=(short)str[i]; + ((short*)(((char*)&chararray->___length___)+sizeof(int)))[i]=(short)str[i]; } return strobj; } @@ -319,3 +243,1517 @@ void abort_task() { exit(-1); #endif } + +INLINE void initruntimedata() { + int i; + // initialize the arrays + if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { + // startup core to initialize corestatus[] + for(i = 0; i < NUMCORESACTIVE; ++i) { + corestatus[i] = 1; + numsendobjs[i] = 0; + numreceiveobjs[i] = 0; +#ifdef MULTICORE_GC + gccorestatus[i] = 1; + gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0; + gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0; +#endif + } // for(i = 0; i < NUMCORESACTIVE; ++i) +#ifdef MULTICORE_GC + for(i = 0; i < NUMCORES4GC; ++i) { + gcloads[i] = 0; + gcrequiredmems[i] = 0; + gcstopblock[i] = 0; + gcfilledblocks[i] = 0; + } // for(i = 0; i < NUMCORES4GC; ++i) +#ifdef GC_PROFILE + gc_infoIndex = 0; + gc_infoOverflow = false; + gc_num_livespace = 0; + gc_num_freespace = 0; +#endif +#endif + numconfirm = 0; + waitconfirm = false; + } + + busystatus = true; + self_numsendobjs = 0; + self_numreceiveobjs = 0; + + for(i = 0; i < BAMBOO_MSG_BUF_LENGTH; ++i) { + msgdata[i] = -1; + } + msgdataindex = 0; + msgdatalast = 0; + msglength = BAMBOO_MSG_BUF_LENGTH; + msgdatafull = false; + for(i = 0; i < BAMBOO_OUT_BUF_LENGTH; ++i) { + outmsgdata[i] = -1; + } + outmsgindex = 0; + outmsglast = 0; + outmsgleft = 0; + isMsgHanging = false; + + smemflag = true; + bamboo_cur_msp = NULL; + bamboo_smem_size = 0; + +#ifdef MULTICORE_GC + bamboo_smem_zero_top = NULL; + gcflag = false; + gcprocessing = false; + gcphase = FINISHPHASE; + gcprecheck = true; + gccurr_heaptop = 0; + gcself_numsendobjs = 0; + gcself_numreceiveobjs = 0; + gcmarkedptrbound = 0; +#ifdef LOCALHASHTBL_TEST + gcpointertbl = allocateRuntimeHash_I(20); +#else + gcpointertbl = mgchashCreate_I(2000, 0.75); +#endif + gcforwardobjtbl = allocateMGCHash_I(20, 3); + gcobj2map = 0; + gcmappedobj = 0; + gcnumlobjs = 0; + gcheaptop = 0; + gctopcore = 0; + gctopblock = 0; + gcmovestartaddr = 0; + gctomove = false; + gcmovepending = 0; + gcblock2fill = 0; + gcsbstarttbl = BAMBOO_BASE_VA; + bamboo_smemtbl = (void *)gcsbstarttbl + + (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE)*sizeof(INTPTR); + if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) { + int t_size = ((BAMBOO_RMSP_SIZE)-sizeof(mgcsharedhashtbl_t)*2 + -128*sizeof(size_t))/sizeof(mgcsharedhashlistnode_t)-2; + int kk = 0; + unsigned int tmp_k = 1 << (sizeof(int)*8 -1); + while(((t_size & tmp_k) == 0) && (kk < sizeof(int)*8)) { + t_size = t_size << 1; + kk++; + } + t_size = tmp_k >> kk; + gcsharedptbl = mgcsharedhashCreate_I(t_size,0.30); + } else { + gcsharedptbl = NULL; + } + BAMBOO_MEMSET_WH(gcrpointertbls, 0, + sizeof(mgcsharedhashtbl_t *)*NUMCORES4GC); +#ifdef SMEMM + gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE + -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8); + gcmem_mixed_usedmem = 0; +#endif +#ifdef GC_PROFILE + gc_num_obj = 0; + gc_num_liveobj = 0; + gc_num_forwardobj = 0; + gc_num_profiles = NUMCORESACTIVE - 1; +#endif +#ifdef GC_FLUSH_DTLB + gc_num_flush_dtlb = 0; +#endif + gc_localheap_s = false; +#ifdef GC_CACHE_ADAPT + gccachestage = false; +#endif // GC_CACHE_ADAPT +#endif // MULTICORE_GC +#ifndef INTERRUPT + reside = false; +#endif + +#ifdef MGC + // TODO + threadlocks = 0; +#endif + +#ifdef TASK + inittaskdata(); +#endif +} + +INLINE void disruntimedata() { +#ifdef MULTICORE_GC +#ifdef LOCALHASHTBL_TEST + freeRuntimeHash(gcpointertbl); +#else + mgchashDelete(gcpointertbl); +#endif + freeMGCHash(gcforwardobjtbl); +#endif // MULTICORE_GC +#ifdef TASK + distaskdata() +#endif // TASK + BAMBOO_LOCAL_MEM_CLOSE(); + BAMBOO_SHARE_MEM_CLOSE(); +} + +INLINE void checkCoreStatus() { + bool allStall = false; + int i = 0; + int sumsendobj = 0; + if((!waitconfirm) || + (waitconfirm && (numconfirm == 0))) { + BAMBOO_DEBUGPRINT(0xee04); + BAMBOO_DEBUGPRINT_REG(waitconfirm); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); + BAMBOO_DEBUGPRINT(0xf001); + corestatus[BAMBOO_NUM_OF_CORE] = 0; + numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs; + numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs; + // check the status of all cores + allStall = true; + BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE); + for(i = 0; i < NUMCORESACTIVE; ++i) { + BAMBOO_DEBUGPRINT(0xe000 + corestatus[i]); + if(corestatus[i] != 0) { + allStall = false; + break; + } + } // for(i = 0; i < NUMCORESACTIVE; ++i) + if(allStall) { + // check if the sum of send objs and receive obj are the same + // yes->check if the info is the latest; no->go on executing + sumsendobj = 0; + for(i = 0; i < NUMCORESACTIVE; ++i) { + sumsendobj += numsendobjs[i]; + BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]); + } // for(i = 0; i < NUMCORESACTIVE; ++i) + for(i = 0; i < NUMCORESACTIVE; ++i) { + sumsendobj -= numreceiveobjs[i]; + BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]); + } // for(i = 0; i < NUMCORESACTIVE; ++i) + if(0 == sumsendobj) { + if(!waitconfirm) { + // the first time found all cores stall + // send out status confirm msg to all other cores + // reset the corestatus array too + BAMBOO_DEBUGPRINT(0xee05); + corestatus[BAMBOO_NUM_OF_CORE] = 1; + waitconfirm = true; + numconfirm = NUMCORESACTIVE - 1; + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + for(i = 1; i < NUMCORESACTIVE; ++i) { + corestatus[i] = 1; + // send status confirm msg to core i + send_msg_1(i, STATUSCONFIRM, false); + } // for(i = 1; i < NUMCORESACTIVE; ++i) + return; + } else { + // all the core status info are the latest + // terminate; for profiling mode, send request to all + // other cores to pour out profiling data + BAMBOO_DEBUGPRINT(0xee06); + +#ifdef USEIO + totalexetime = BAMBOO_GET_EXE_TIME() - bamboo_start_time; +#else + + BAMBOO_PRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time); + //BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test +#ifdef GC_FLUSH_DTLB + BAMBOO_PRINT_REG(gc_num_flush_dtlb); +#endif +#ifndef BAMBOO_MEMPROF + BAMBOO_PRINT(0xbbbbbbbb); +#endif +#endif + // profile mode, send msgs to other cores to request pouring + // out progiling data +#ifdef PROFILE + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + BAMBOO_DEBUGPRINT(0xf000); + for(i = 1; i < NUMCORESACTIVE; ++i) { + // send profile request msg to core i + send_msg_2(i, PROFILEOUTPUT, totalexetime, false); + } // for(i = 1; i < NUMCORESACTIVE; ++i) +#ifndef RT_TEST + // pour profiling data on startup core + outputProfileData(); +#endif + while(true) { + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); + BAMBOO_DEBUGPRINT(0xf001); + profilestatus[BAMBOO_NUM_OF_CORE] = 0; + // check the status of all cores + allStall = true; + BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE); + for(i = 0; i < NUMCORESACTIVE; ++i) { + BAMBOO_DEBUGPRINT(0xe000 + profilestatus[i]); + if(profilestatus[i] != 0) { + allStall = false; + break; + } + } // for(i = 0; i < NUMCORESACTIVE; ++i) + if(!allStall) { + int halt = 100; + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + BAMBOO_DEBUGPRINT(0xf000); + while(halt--) { + } + } else { + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + break; + } // if(!allStall) + } // while(true) +#endif + + // gc_profile mode, output gc prfiling data +#ifdef MULTICORE_GC +#ifdef GC_CACHE_ADAPT + bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt +#endif // GC_CACHE_ADAPT +#ifdef GC_PROFILE + gc_outputProfileData(); +#endif // #ifdef GC_PROFILE +#endif // #ifdef MULTICORE_GC + disruntimedata(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + terminate(); // All done. + } // if(!waitconfirm) + } else { + // still some objects on the fly on the network + // reset the waitconfirm and numconfirm + BAMBOO_DEBUGPRINT(0xee07); + waitconfirm = false; + numconfirm = 0; + } // if(0 == sumsendobj) + } else { + // not all cores are stall, keep on waiting + BAMBOO_DEBUGPRINT(0xee08); + waitconfirm = false; + numconfirm = 0; + } // if(allStall) + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + BAMBOO_DEBUGPRINT(0xf000); + } // if((!waitconfirm) || +} + +// main function for each core +inline void run(void * arg) { + int i = 0; + int argc = 1; + char ** argv = NULL; + bool sendStall = false; + bool isfirst = true; + bool tocontinue = false; + + corenum = BAMBOO_GET_NUM_OF_CORE(); + BAMBOO_DEBUGPRINT(0xeeee); + BAMBOO_DEBUGPRINT_REG(corenum); + BAMBOO_DEBUGPRINT(STARTUPCORE); + + // initialize runtime data structures + initruntimedata(); + + // other architecture related initialization + initialization(); + initCommunication(); + +#ifdef GC_CACHE_ADAPT +// enable the timer interrupt +#ifdef GC_CACHE_SAMPLING + bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); // TODO + bamboo_unmask_timer_intr(); + bamboo_dtlb_sampling_process(); +#endif // GC_CACHE_SAMPLING +#endif // GC_CACHE_ADAPT + + initializeexithandler(); + + // main process of the execution module + if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) { +#ifdef TASK + // non-executing cores, only processing communications + activetasks = NULL; +#endif + fakeExecution(); + } else { +#ifdef TASK + /* Create queue of active tasks */ + activetasks= + genallocatehashtable((unsigned int (*)(void *)) &hashCodetpd, + (int (*)(void *,void *)) &comparetpd); + + /* Process task information */ + processtasks(); + + if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { + /* Create startup object */ + createstartupobject(argc, argv); + } + + BAMBOO_DEBUGPRINT(0xee00); +#endif + + while(true) { + +#ifdef MULTICORE_GC + // check if need to do GC + if(gcflag) { + gc(NULL); + } +#endif // MULTICORE_GC + +#ifdef TASK + // check if there are new active tasks can be executed + executetasks(); + if(busystatus) { + sendStall = false; + } + +#ifndef INTERRUPT + while(receiveObject() != -1) { + } +#endif + + BAMBOO_DEBUGPRINT(0xee01); + + // check if there are some pending objects, + // if yes, enqueue them and executetasks again + tocontinue = checkObjQueue(); +#elif defined MGC + // TODO +#endif + + if(!tocontinue) { + // check if stop + if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { + if(isfirst) { + BAMBOO_DEBUGPRINT(0xee03); + isfirst = false; + } + checkCoreStatus(); + } else { + if(!sendStall) { + BAMBOO_DEBUGPRINT(0xee09); +#ifdef PROFILE + if(!stall) { +#endif + if(isfirst) { + // wait for some time + int halt = 10000; + BAMBOO_DEBUGPRINT(0xee0a); + while(halt--) { + } + isfirst = false; + } else { + // send StallMsg to startup core + BAMBOO_DEBUGPRINT(0xee0b); + // send stall msg + send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE, + self_numsendobjs, self_numreceiveobjs, false); + sendStall = true; + isfirst = true; + busystatus = false; + } +#ifdef PROFILE + } +#endif + } else { + isfirst = true; + busystatus = false; + BAMBOO_DEBUGPRINT(0xee0c); + } // if(!sendStall) + } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) + } // if(!tocontinue) + } // while(true) + } // if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) + +} // run() + +INLINE int checkMsgLength_I(int size) { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xcccc); +#endif + int type = msgdata[msgdataindex]; + switch(type) { + case STATUSCONFIRM: + case TERMINATE: +#ifdef MULTICORE_GC + case GCSTARTPRE: + case GCSTARTINIT: + case GCSTART: + case GCSTARTMAPINFO: + case GCSTARTFLUSH: + case GCFINISH: + case GCMARKCONFIRM: + case GCLOBJREQUEST: +#ifdef GC_CACHE_ADAPT + case GCSTARTPREF: +#endif // GC_CACHE_ADAPT +#endif // MULTICORE_GC + { + msglength = 1; + break; + } + +#ifdef TASK + case PROFILEOUTPUT: + case PROFILEFINISH: +#endif +#ifdef MULTICORE_GC + case GCSTARTCOMPACT: + case GCMARKEDOBJ: + case GCFINISHINIT: + case GCFINISHMAPINFO: + case GCFINISHFLUSH: +#ifdef GC_CACHE_ADAPT + case GCFINISHPREF: +#endif // GC_CACHE_ADAPT +#endif // MULTICORE_GC + { + msglength = 2; + break; + } + + case MEMREQUEST: + case MEMRESPONSE: +#ifdef MULTICORE_GC + case GCMAPREQUEST: + case GCMAPINFO: + case GCMAPTBL: + case GCLOBJMAPPING: +#endif + { + msglength = 3; + break; + } + + case TRANSTALL: +#ifdef TASK + case LOCKGROUNT: + case LOCKDENY: + case LOCKRELEASE: + case REDIRECTGROUNT: + case REDIRECTDENY: + case REDIRECTRELEASE: +#endif +#ifdef MULTICORE_GC + case GCFINISHPRE: + case GCFINISHMARK: + case GCMOVESTART: +#ifdef GC_PROFILE + case GCPROFILES: +#endif +#endif + { + msglength = 4; + break; + } + +#ifdef TASK + case LOCKREQUEST: +#endif + case STATUSREPORT: +#ifdef MULTICORE_GC + case GCFINISHCOMPACT: + case GCMARKREPORT: +#endif + { + msglength = 5; + break; + } + +#ifdef TASK + case REDIRECTLOCK: + { + msglength = 6; + break; + } +#endif + +#ifdef TASK + case TRANSOBJ: // nonfixed size +#endif +#ifdef MULTICORE_GC + case GCLOBJINFO: +#endif + { // nonfixed size + if(size > 1) { + msglength = msgdata[(msgdataindex+1)&(BAMBOO_MSG_BUF_MASK)]; + } else { + return -1; + } + break; + } + + default: + { + BAMBOO_DEBUGPRINT_REG(type); + BAMBOO_DEBUGPRINT_REG(size); + BAMBOO_DEBUGPRINT_REG(msgdataindex); + BAMBOO_DEBUGPRINT_REG(msgdatalast); + BAMBOO_DEBUGPRINT_REG(msgdatafull); + int i = 6; + while(i-- > 0) { + BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]); + } + BAMBOO_EXIT(0xe004); + break; + } + } +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]); + BAMBOO_DEBUGPRINT(0xffff); +#endif + return msglength; +} + +INLINE void processmsg_transtall_I() { + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive stall msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/); +#endif + BAMBOO_EXIT(0xe006); + } + int num_core = msgdata[msgdataindex]; //[1] + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; //[2]; + MSG_INDEXINC_I(); + int data3 = msgdata[msgdataindex]; //[3]; + MSG_INDEXINC_I(); + if(num_core < NUMCORESACTIVE) { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe881); +#endif + corestatus[num_core] = 0; + numsendobjs[num_core] = data2; //[2]; + numreceiveobjs[num_core] = data3; //[3]; + } +} + +INLINE void processmsg_statusconfirm_I() { + if((BAMBOO_NUM_OF_CORE == STARTUPCORE) + || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) { + // wrong core to receive such msg + BAMBOO_EXIT(0xe011); + } else { + // send response msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe887); +#endif + // cache the msg first + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_5(STARTUPCORE, STATUSREPORT, + busystatus ? 1 : 0, BAMBOO_NUM_OF_CORE, + self_numsendobjs, self_numreceiveobjs); + } else { + send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, + BAMBOO_NUM_OF_CORE, self_numsendobjs, + self_numreceiveobjs, true); + } + } +} + +INLINE void processmsg_statusreport_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data3 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data4 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // receive a status confirm info + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // wrong core to receive such msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data2); +#endif + BAMBOO_EXIT(0xe012); + } else { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe888); +#endif + if(waitconfirm) { + numconfirm--; + } + corestatus[data2] = data1; + numsendobjs[data2] = data3; + numreceiveobjs[data2] = data4; + } +} + +INLINE void processmsg_terminate_I() { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe889); +#endif + disruntimedata(); +#ifdef MULTICORE_GC +#ifdef GC_CACHE_ADAPT + bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt +#endif // GC_CACHE_ADAPT +#endif // MULTICORE_GC + BAMBOO_EXIT_APP(0); +} + +INLINE void processmsg_memrequest_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // receive a shared memory request msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // wrong core to receive such msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data2); +#endif + BAMBOO_EXIT(0xe013); + } else { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe88a); +#endif + int allocsize = 0; + void * mem = NULL; +#ifdef MULTICORE_GC + if(gcprocessing) { + // is currently doing gc, dump this msg + if(INITPHASE == gcphase) { + // if still in the initphase of gc, send a startinit msg again, + // cache the msg first + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_1(data2, GCSTARTINIT); + } else { + send_msg_1(data2, GCSTARTINIT, true); + } + } + } else { +#endif + mem = smemalloc_I(data2, data1, &allocsize); + if(mem != NULL) { + // send the start_va to request core, cache the msg first + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_3(data2, MEMRESPONSE, mem, allocsize); + } else { + send_msg_3(data2, MEMRESPONSE, mem, allocsize, true); + } + } //else + // if mem == NULL, the gcflag of the startup core has been set + // and all the other cores have been informed to start gc +#ifdef MULTICORE_GC + } +#endif + } +} + +INLINE void processmsg_memresponse_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // receive a shared memory response msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe88b); +#endif +#ifdef MULTICORE_GC + // if is currently doing gc, dump this msg + if(!gcprocessing) { +#endif + if(data2 == 0) { + bamboo_smem_size = 0; + bamboo_cur_msp = 0; +#ifdef MULTICORE_GC + bamboo_smem_zero_top = 0; +#endif + } else { +#ifdef MULTICORE_GC + // fill header to store the size of this mem block + BAMBOO_MEMSET_WH(data1, '\0', BAMBOO_CACHE_LINE_SIZE); + (*((int*)data1)) = data2; + bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE; + bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE; + bamboo_smem_zero_top = bamboo_cur_msp; +#else + bamboo_smem_size = data2; + bamboo_cur_msp =(void*)(data1); +#endif + } + smemflag = true; +#ifdef MULTICORE_GC +} +#endif +} + +#ifdef MULTICORE_GC +INLINE void processmsg_gcstartpre_I() { + if(gcprocessing) { + // already stall for gc + // send a update pregc information msg to the master core + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, + self_numsendobjs, self_numreceiveobjs); + } else { + send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, + self_numsendobjs, self_numreceiveobjs, true); + } + } else { + // the first time to be informed to start gc + gcflag = true; + if(!smemflag) { + // is waiting for response of mem request + // let it return NULL and start gc + bamboo_smem_size = 0; + bamboo_cur_msp = NULL; + smemflag = true; + bamboo_smem_zero_top = NULL; + } + } +} + +INLINE void processmsg_gcstartinit_I() { + gcphase = INITPHASE; +} + +INLINE void processmsg_gcstart_I() { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe88c); +#endif + // set the GC flag + gcphase = MARKPHASE; +} + +INLINE void processmsg_gcstartcompact_I() { + gcblock2fill = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[1]; + gcphase = COMPACTPHASE; +} + +INLINE void processmsg_gcstartmapinfo_I() { + gcphase = MAPPHASE; +} + +INLINE void processmsg_gcstartflush_I() { + gcphase = FLUSHPHASE; +} + +INLINE void processmsg_gcfinishpre_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data3 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a init phase finish msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); +#endif + BAMBOO_EXIT(0xe014); + } + // All cores should do init GC + if(!gcprecheck) { + gcprecheck = true; + } + gccorestatus[data1] = 0; + gcnumsendobjs[0][data1] = data2; + gcnumreceiveobjs[0][data1] = data3; +} + +INLINE void processmsg_gcfinishinit_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a init phase finish msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); +#endif + BAMBOO_EXIT(0xe015); + } +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe88c); + BAMBOO_DEBUGPRINT_REG(data1); +#endif + // All cores should do init GC + if(data1 < NUMCORESACTIVE) { + gccorestatus[data1] = 0; + } +} + +INLINE void processmsg_gcfinishmark_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data3 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a mark phase finish msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); +#endif + BAMBOO_EXIT(0xe016); + } + // all cores should do mark + if(data1 < NUMCORESACTIVE) { + gccorestatus[data1] = 0; + int entry_index = 0; + if(waitconfirm) { + // phase 2 + entry_index = (gcnumsrobjs_index == 0) ? 1 : 0; + } else { + // phase 1 + entry_index = gcnumsrobjs_index; + } + gcnumsendobjs[entry_index][data1] = data2; + gcnumreceiveobjs[entry_index][data1] = data3; + } +} + +INLINE void processmsg_gcfinishcompact_I() { + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg + // return -1 +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/); +#endif + BAMBOO_EXIT(0xe017); + } + int cnum = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[1]; + int filledblocks = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[2]; + int heaptop = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[3]; + int data4 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[4]; + // only gc cores need to do compact + if(cnum < NUMCORES4GC) { + if(COMPACTPHASE == gcphase) { + gcfilledblocks[cnum] = filledblocks; + gcloads[cnum] = heaptop; + } + if(data4 > 0) { + // ask for more mem + int startaddr = 0; + int tomove = 0; + int dstcore = 0; + if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) { + // cache the msg first + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove); + } else { + send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true); + } + } + } else { + gccorestatus[cnum] = 0; + } // if(data4>0) + } // if(cnum < NUMCORES4GC) +} + +INLINE void processmsg_gcfinishmapinfo_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a map phase finish msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); +#endif + BAMBOO_EXIT(0xe018); + } + // all cores should do flush + if(data1 < NUMCORES4GC) { + gccorestatus[data1] = 0; + } +} + + +INLINE void processmsg_gcfinishflush_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a flush phase finish msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); +#endif + BAMBOO_EXIT(0xe019); + } + // all cores should do flush + if(data1 < NUMCORESACTIVE) { + gccorestatus[data1] = 0; + } +} + +INLINE void processmsg_gcmarkconfirm_I() { + if((BAMBOO_NUM_OF_CORE == STARTUPCORE) + || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) { + // wrong core to receive such msg + BAMBOO_EXIT(0xe01a); + } else { + // send response msg, cahce the msg first + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, + gcbusystatus, gcself_numsendobjs, + gcself_numreceiveobjs); + } else { + send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, + gcbusystatus, gcself_numsendobjs, + gcself_numreceiveobjs, true); + } + } +} + +INLINE void processmsg_gcmarkreport_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data3 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data4 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a marked phase finish confirm response msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // wrong core to receive such msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data2); +#endif + BAMBOO_EXIT(0xe01b); + } else { + int entry_index = 0; + if(waitconfirm) { + // phse 2 + numconfirm--; + entry_index = (gcnumsrobjs_index == 0) ? 1 : 0; + } else { + // can never reach here + // phase 1 + entry_index = gcnumsrobjs_index; + } + gccorestatus[data1] = data2; + gcnumsendobjs[entry_index][data1] = data3; + gcnumreceiveobjs[entry_index][data1] = data4; + } +} + +INLINE void processmsg_gcmarkedobj_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a markedObj msg + if(((int *)data1)[6] == INIT) { + // this is the first time that this object is discovered, + // set the flag as DISCOVERED + ((int *)data1)[6] = DISCOVERED; + gc_enqueue_I(data1); + } + // set the remote flag + ((int *)data1)[6] |= REMOTEM; + gcself_numreceiveobjs++; + gcbusystatus = true; +} + +INLINE void processmsg_gcmovestart_I() { + gctomove = true; + gcdstcore = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[1]; + gcmovestartaddr = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[2]; + gcblock2fill = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[3]; +} + +INLINE void processmsg_gcmaprequest_I() { + void * dstptr = NULL; + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); +#ifdef LOCALHASHTBL_TEST + RuntimeHashget(gcpointertbl, data1, &dstptr); +#else + dstptr = mgchashSearch(gcpointertbl, data1); +#endif + if(NULL == dstptr) { + // no such pointer in this core, something is wrong +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); + BAMBOO_DEBUGPRINT_REG(data2); +#endif + BAMBOO_EXIT(0xe01c); + } else { + // send back the mapping info, cache the msg first + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr); + } else { + send_msg_3(data2, GCMAPINFO, data1, (int)dstptr, true); + } + } +} + +INLINE void processmsg_gcmapinfo_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + gcmappedobj = msgdata[msgdataindex]; // [2] + MSG_INDEXINC_I(); +#ifdef LOCALHASHTBL_TEST + RuntimeHashadd_I(gcpointertbl, data1, gcmappedobj); +#else + mgchashInsert_I(gcpointertbl, data1, gcmappedobj); +#endif + if(data1 == gcobj2map) { + gcismapped = true; + } +} + +INLINE void processmsg_gcmaptbl_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + gcrpointertbls[data2] = (mgcsharedhashtbl_t *)data1; +} + +INLINE void processmsg_gclobjinfo_I() { + numconfirm--; + + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data2); +#endif + BAMBOO_EXIT(0xe01d); + } + // store the mark result info + int cnum = data2; + gcloads[cnum] = msgdata[msgdataindex]; + MSG_INDEXINC_I(); // msgdata[3]; + int data4 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + if(gcheaptop < data4) { + gcheaptop = data4; + } + // large obj info here + for(int k = 5; k < data1; k+=2) { + int lobj = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[k++]; + int length = msgdata[msgdataindex]; + MSG_INDEXINC_I(); //msgdata[k++]; + gc_lobjenqueue_I(lobj, length, cnum); + gcnumlobjs++; + } // for(int k = 5; k < msgdata[1];) +} + +INLINE void processmsg_gclobjmapping_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); +#ifdef LOCALHASHTBL_TEST + RuntimeHashadd_I(gcpointertbl, data1, data2); +#else + mgchashInsert_I(gcpointertbl, data1, data2); +#endif + mgcsharedhashInsert_I(gcsharedptbl, data1, data2); +} + +#ifdef GC_PROFILE +INLINE void processmsg_gcprofiles_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data2 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + int data3 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + gc_num_obj += data1; + gc_num_liveobj += data2; + gc_num_forwardobj += data3; + gc_num_profiles--; +} +#endif // GC_PROFILE + +#ifdef GC_CACHE_ADAPT +INLINE void processmsg_gcstartpref_I() { + gcphase = PREFINISHPHASE; +} + +INLINE void processmsg_gcfinishpref_I() { + int data1 = msgdata[msgdataindex]; + MSG_INDEXINC_I(); + // received a flush phase finish msg + if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { + // non startup core can not receive this msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(data1); +#endif + BAMBOO_EXIT(0xe01e); + } + // all cores should do flush + if(data1 < NUMCORESACTIVE) { + gccorestatus[data1] = 0; + } +} +#endif // GC_CACHE_ADAPT +#endif // #ifdef MULTICORE_GC + +// receive object transferred from other cores +// or the terminate message from other cores +// Should be invoked in critical sections!! +// NOTICE: following format is for threadsimulate version only +// RAW version please see previous description +// format: type + object +// type: -1--stall msg +// !-1--object +// return value: 0--received an object +// 1--received nothing +// 2--received a Stall Msg +// 3--received a lock Msg +// RAW version: -1 -- received nothing +// otherwise -- received msg type +int receiveObject(int send_port_pending) { +#ifdef TASK +#ifdef PROFILE_INTERRUPT + if(!interruptInfoOverflow) { + InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info)); + interruptInfoArray[interruptInfoIndex] = intInfo; + intInfo->startTime = BAMBOO_GET_EXE_TIME(); + intInfo->endTime = -1; + } +#endif // PROFILE_INTERRUPT +#endif // TASK +msg: + // get the incoming msgs + if(receiveMsg(send_port_pending) == -1) { + return -1; + } +processmsg: + // processing received msgs + int size = 0; + MSG_REMAINSIZE_I(&size); + if((size == 0) || (checkMsgLength_I(size) == -1)) { + // not a whole msg + // have new coming msg + if((BAMBOO_MSG_AVAIL() != 0) && !msgdatafull) { + goto msg; + } else { + return -1; + } + } + + if(msglength <= size) { + // have some whole msg + MSGTYPE type; + type = msgdata[msgdataindex]; //[0] + MSG_INDEXINC_I(); + msgdatafull = false; + switch(type) { +#ifdef TASK + case TRANSOBJ: { + // receive a object transfer msg + processmsg_transobj_I(); + break; + } // case TRANSOBJ +#endif // TASK + + case TRANSTALL: { + // receive a stall msg + processmsg_transtall_I(); + break; + } // case TRANSTALL + +#ifdef TASK +// GC version have no lock msgs +#ifndef MULTICORE_GC + case LOCKREQUEST: { + // receive lock request msg, handle it right now + processmsg_lockrequest_I(); + break; + } // case LOCKREQUEST + + case LOCKGROUNT: { + // receive lock grount msg + processmsg_lockgrount_I(); + break; + } // case LOCKGROUNT + + case LOCKDENY: { + // receive lock deny msg + processmsg_lockdeny_I(); + break; + } // case LOCKDENY + + case LOCKRELEASE: { + processmsg_lockrelease_I(); + break; + } // case LOCKRELEASE +#endif // #ifndef MULTICORE_GC + +#ifdef PROFILE + case PROFILEOUTPUT: { + // receive an output profile data request msg + processmsg_profileoutput_I(); + break; + } // case PROFILEOUTPUT + + case PROFILEFINISH: { + // receive a profile output finish msg + processmsg_profilefinish_I(); + break; + } // case PROFILEFINISH +#endif // #ifdef PROFILE + +// GC version has no lock msgs +#ifndef MULTICORE_GC + case REDIRECTLOCK: { + // receive a redirect lock request msg, handle it right now + processmsg_redirectlock_I(); + break; + } // case REDIRECTLOCK + + case REDIRECTGROUNT: { + // receive a lock grant msg with redirect info + processmsg_redirectgrount_I(); + break; + } // case REDIRECTGROUNT + + case REDIRECTDENY: { + // receive a lock deny msg with redirect info + processmsg_redirectdeny_I(); + break; + } // case REDIRECTDENY + + case REDIRECTRELEASE: { + // receive a lock release msg with redirect info + processmsg_redirectrelease_I(); + break; + } // case REDIRECTRELEASE +#endif // #ifndef MULTICORE_GC +#endif // TASK + + case STATUSCONFIRM: { + // receive a status confirm info + processmsg_statusconfirm_I(); + break; + } // case STATUSCONFIRM + + case STATUSREPORT: { + processmsg_statusreport_I(); + break; + } // case STATUSREPORT + + case TERMINATE: { + // receive a terminate msg + processmsg_terminate_I(); + break; + } // case TERMINATE + + case MEMREQUEST: { + processmsg_memrequest_I(); + break; + } // case MEMREQUEST + + case MEMRESPONSE: { + processmsg_memresponse_I(); + break; + } // case MEMRESPONSE + +#ifdef MULTICORE_GC + // GC msgs + case GCSTARTPRE: { + processmsg_gcstartpre_I(); + break; + } // case GCSTARTPRE + + case GCSTARTINIT: { + processmsg_gcstartinit_I(); + break; + } // case GCSTARTINIT + + case GCSTART: { + // receive a start GC msg + processmsg_gcstart_I(); + break; + } // case GCSTART + + case GCSTARTCOMPACT: { + // a compact phase start msg + processmsg_gcstartcompact_I(); + break; + } // case GCSTARTCOMPACT + + case GCSTARTMAPINFO: { + // received a flush phase start msg + processmsg_gcstartmapinfo_I(); + break; + } // case GCSTARTFLUSH + + case GCSTARTFLUSH: { + // received a flush phase start msg + processmsg_gcstartflush_I(); + break; + } // case GCSTARTFLUSH + + case GCFINISHPRE: { + processmsg_gcfinishpre_I(); + break; + } // case GCFINISHPRE + + case GCFINISHINIT: { + processmsg_gcfinishinit_I(); + break; + } // case GCFINISHINIT + + case GCFINISHMARK: { + processmsg_gcfinishmark_I(); + break; + } // case GCFINISHMARK + + case GCFINISHCOMPACT: { + // received a compact phase finish msg + processmsg_gcfinishcompact_I(); + break; + } // case GCFINISHCOMPACT + + case GCFINISHMAPINFO: { + processmsg_gcfinishmapinfo_I(); + break; + } // case GCFINISHMAPINFO + + case GCFINISHFLUSH: { + processmsg_gcfinishflush_I(); + break; + } // case GCFINISHFLUSH + + case GCFINISH: { + // received a GC finish msg + gcphase = FINISHPHASE; + break; + } // case GCFINISH + + case GCMARKCONFIRM: { + // received a marked phase finish confirm request msg + // all cores should do mark + processmsg_gcmarkconfirm_I(); + break; + } // case GCMARKCONFIRM + + case GCMARKREPORT: { + processmsg_gcmarkreport_I(); + break; + } // case GCMARKREPORT + + case GCMARKEDOBJ: { + processmsg_gcmarkedobj_I(); + break; + } // case GCMARKEDOBJ + + case GCMOVESTART: { + // received a start moving objs msg + processmsg_gcmovestart_I(); + break; + } // case GCMOVESTART + + case GCMAPREQUEST: { + // received a mapping info request msg + processmsg_gcmaprequest_I(); + break; + } // case GCMAPREQUEST + + case GCMAPINFO: { + // received a mapping info response msg + processmsg_gcmapinfo_I(); + break; + } // case GCMAPINFO + + case GCMAPTBL: { + // received a mapping tbl response msg + processmsg_gcmaptbl_I(); + break; + } // case GCMAPTBL + + case GCLOBJREQUEST: { + // received a large objs info request msg + transferMarkResults_I(); + break; + } // case GCLOBJREQUEST + + case GCLOBJINFO: { + // received a large objs info response msg + processmsg_gclobjinfo_I(); + break; + } // case GCLOBJINFO + + case GCLOBJMAPPING: { + // received a large obj mapping info msg + processmsg_gclobjmapping_I(); + break; + } // case GCLOBJMAPPING + +#ifdef GC_PROFILE + case GCPROFILES: { + // received a gcprofiles msg + processmsg_gcprofiles_I(); + break; + } +#endif // GC_PROFILE + +#ifdef GC_CACHE_ADAPT + case GCSTARTPREF: { + // received a gcstartpref msg + processmsg_gcstartpref_I(); + break; + } + + case GCFINISHPREF: { + // received a gcfinishpref msg + processmsg_gcfinishpref_I(); + break; + } +#endif // GC_CACHE_ADAPT +#endif // #ifdef MULTICORE_GC + + default: + break; + } // switch(type) + msglength = BAMBOO_MSG_BUF_LENGTH; + + if((msgdataindex != msgdatalast) || (msgdatafull)) { + // still have available msg + goto processmsg; + } +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe88d); +#endif + + // have new coming msg + if(BAMBOO_MSG_AVAIL() != 0) { + goto msg; + } // TODO + +#ifdef TASK +#ifdef PROFILE_INTERRUPT + if(!interruptInfoOverflow) { + interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME(); + interruptInfoIndex++; + if(interruptInfoIndex == INTERRUPTINFOLENGTH) { + interruptInfoOverflow = true; + } + } +#endif +#endif // TASK + return (int)type; + } else { + // not a whole msg +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT(0xe88e); +#endif + return -2; + } +} + +#endif // MULTICORE diff --git a/Robust/src/Runtime/bamboo/multicoreruntime.h b/Robust/src/Runtime/bamboo/multicoreruntime.h index da4209c1..dd423081 100644 --- a/Robust/src/Runtime/bamboo/multicoreruntime.h +++ b/Robust/src/Runtime/bamboo/multicoreruntime.h @@ -1,5 +1,7 @@ #ifndef MULTICORE_RUNTIME #define MULTICORE_RUNTIME +#include "structdefs.h" +#include "Queue.h" #ifndef INLINE #define INLINE inline __attribute__((always_inline)) @@ -17,6 +19,11 @@ // record the starting time unsigned long long bamboo_start_time; +bool stall; +int totalexetime; +#ifndef INTERRUPT +bool reside; +#endif // data structures for msgs #define BAMBOO_OUT_BUF_LENGTH 2048 @@ -218,14 +225,14 @@ typedef enum { GCLOBJMAPPING, // 0xF8 #ifdef GC_PROFILE GCPROFILES, // 0xF9 -#endif +#endif // GC_PROFILE #ifdef GC_CACHE_ADAPT GCSTARTPOSTINIT, // 0xFa GCSTARTPREF, // 0xFb GCFINISHPOSTINIT, // 0xFc GCFINISHPREF, // 0xFd #endif // GC_CACHE_ADAPT -#endif +#endif // MULTICORE_GC MSGEND } MSGTYPE; @@ -257,6 +264,8 @@ bool busystatus; int self_numsendobjs; int self_numreceiveobjs; +// TASK specific data structures +#ifdef TASK // get rid of lock msgs for GC version #ifndef MULTICORE_GC // data structures for locking @@ -264,7 +273,7 @@ struct RuntimeHash locktable; static struct RuntimeHash* locktbl = &locktable; struct RuntimeHash * lockRedirectTbl; struct RuntimeHash * objRedirectLockTbl; -#endif +#endif // ifndef MULTICORE_GC struct LockValue { int redirectlock; int value; @@ -278,142 +287,17 @@ bool lockflag; struct Queue objqueue; struct Queue * totransobjqueue; // queue to hold objs to be transferred // should be cleared whenever enter a task - -// data structures for shared memory allocation -#ifdef TILERA_BME -#define BAMBOO_BASE_VA 0xd000000 -#elif defined TILERA_ZLINUX -#ifdef MULTICORE_GC -#define BAMBOO_BASE_VA 0xd000000 -#endif // MULTICORE_GC -#endif // TILERA_BME - -#ifdef BAMBOO_MEMPROF -#define GC_BAMBOO_NUMCORES 56 -#else -#define GC_BAMBOO_NUMCORES 62 -#endif - -#ifdef GC_DEBUG -#include "structdefs.h" -#define BAMBOO_NUM_BLOCKS (NUMCORES4GC*(2+1)+3) -#define BAMBOO_PAGE_SIZE (64 * 64) -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) *(BAMBOO_NUM_BLOCKS)) - -#elif defined GC_CACHE_ADAPT -#ifdef GC_LARGESHAREDHEAP -#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+24)) -#else -#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+14)) -#endif -#define BAMBOO_PAGE_SIZE (64 * 1024) // 64K -#ifdef GC_LARGEPAGESIZE -#define BAMBOO_PAGE_SIZE (4 * 64 * 1024) -#define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE)) -#elif defined GC_SMALLPAGESIZE -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#elif defined GC_SMALLPAGESIZE2 -#define BAMBOO_PAGE_SIZE (16 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#elif defined GC_LARGEPAGESIZE2 -#define BAMBOO_PAGE_SIZE (4 * 64 * 1024) // 64K -#define BAMBOO_SMEM_SIZE ((BAMBOO_PAGE_SIZE)) -#else -#define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE)) -#endif // GC_LARGEPAGESIZE -#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) - -#else // GC_DEBUG -#ifdef GC_LARGESHAREDHEAP -#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2)) -#elif defined GC_LARGESHAREDHEAP2 -#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2)) -#else -#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G -#endif -#ifdef GC_LARGEPAGESIZE -#define BAMBOO_PAGE_SIZE (4 * 1024 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#elif defined GC_SMALLPAGESIZE -#define BAMBOO_PAGE_SIZE (256 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#elif defined GC_SMALLPAGESIZE2 -#define BAMBOO_PAGE_SIZE (64 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#else -#define BAMBOO_PAGE_SIZE (1024 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) -#endif // GC_LARGEPAGESIZE -#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) //(1024 * 1024 * 240) //((unsigned long long int)(3.0 * 1024 * 1024 * 1024)) // 3G -#endif // GC_DEBUG - -#ifdef MULTICORE_GC -volatile bool gc_localheap_s; -#endif - -#ifdef MULTICORE_GC -#include "multicoregarbage.h" - -typedef enum { - SMEMLOCAL = 0x0,// 0x0, using local mem only - SMEMFIXED, // 0x1, use local mem in lower address space(1 block only) - // and global mem in higher address space - SMEMMIXED, // 0x2, like FIXED mode but use a threshold to control - SMEMGLOBAL, // 0x3, using global mem only - SMEMEND -} SMEMSTRATEGY; - -SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; - //-DSMEMM: MIXED; -DSMEMG: GLOBAL; - -struct freeMemItem { - INTPTR ptr; - int size; - int startblock; - int endblock; - struct freeMemItem * next; -}; - -struct freeMemList { - struct freeMemItem * head; - struct freeMemItem * backuplist; // hold removed freeMemItem for reuse; - // only maintain 1 freemMemItem -}; - -// table recording the number of allocated bytes on each block -// Note: this table resides on the bottom of the shared heap for all cores -// to access -volatile int * bamboo_smemtbl; -volatile int bamboo_free_block; -unsigned int bamboo_reserved_smem; // reserved blocks on the top of the shared - // heap e.g. 20% of the heap and should not - // be allocated otherwise gc is invoked -volatile INTPTR bamboo_smem_zero_top; -#define BAMBOO_SMEM_ZERO_UNIT_SIZE (4 * 1024) // 4KB -#else -//volatile mspace bamboo_free_msp; -INTPTR bamboo_free_smemp; -int bamboo_free_smem_size; -#endif -volatile bool smemflag; -volatile INTPTR bamboo_cur_msp; -volatile int bamboo_smem_size; - + // for test TODO int total_num_t6; // data structures for profile mode #ifdef PROFILE - #define TASKINFOLENGTH 3000 // 0 #ifdef PROFILE_INTERRUPT #define INTERRUPTINFOLENGTH 50 //0 #endif // PROFILE_INTERRUPT -bool stall; -int totalexetime; - typedef struct task_info { char* taskName; unsigned long long startTime; @@ -439,17 +323,16 @@ volatile int profilestatus[NUMCORESACTIVE]; // records status of each core // 1: running tasks // 0: stall #endif // #ifdef PROFILE +#endif // TASK + +#include "multicoremem.h" -#ifndef INTERRUPT -bool reside; -#endif ///////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // these are functions should be implemented in // // multicore runtime for any multicore processors // //////////////////////////////////////////////////////////// -#ifdef TASK #ifdef MULTICORE INLINE void initialization(void); INLINE void initCommunication(void); @@ -458,33 +341,7 @@ INLINE void terminate(void); INLINE void initlock(struct ___Object___ * v); #ifdef BAMBOO_MEMPROF INLINE void terminatememprof(void); -#endif - -// lock related functions -bool getreadlock(void* ptr); -void releasereadlock(void* ptr); -bool getwritelock(void* ptr); -void releasewritelock(void* ptr); -bool getwritelock_I(void* ptr); -void releasewritelock_I(void * ptr); -#ifndef MULTICORE_GC -void releasewritelock_r(void * lock, void * redirectlock); -#endif -/* this function is to process lock requests. - * can only be invoked in receiveObject() */ -// if return -1: the lock request is redirected -// 0: the lock request is approved -// 1: the lock request is denied -INLINE int processlockrequest(int locktype, - int lock, - int obj, - int requestcore, - int rootrequestcore, - bool cache); -INLINE void processlockrelease(int locktype, - int lock, - int redirectlock, - bool redirect); +#endif // BAMBOO_MEMPROF // msg related functions INLINE void send_hanging_msg(bool isInterrupt); @@ -548,12 +405,41 @@ INLINE void cache_msg_6(int targetcore, unsigned long n3, unsigned long n4, unsigned long n5); -INLINE void transferObject(struct transObjInfo * transObj); INLINE int receiveMsg(uint32_t send_port_pending); #ifdef MULTICORE_GC INLINE void transferMarkResults(); -#endif +#endif // MULTICORE_GC + +#ifdef TASK +// lock related functions +bool getreadlock(void* ptr); +void releasereadlock(void* ptr); +bool getwritelock(void* ptr); +void releasewritelock(void* ptr); +bool getwritelock_I(void* ptr); +void releasewritelock_I(void * ptr); +#ifndef MULTICORE_GC +void releasewritelock_r(void * lock, void * redirectlock); +#endif // ifndef MULTICORE_GC +/* this function is to process lock requests. + * can only be invoked in receiveObject() */ +// if return -1: the lock request is redirected +// 0: the lock request is approved +// 1: the lock request is denied +INLINE int processlockrequest(int locktype, + int lock, + int obj, + int requestcore, + int rootrequestcore, + bool cache); +INLINE void processlockrelease(int locktype, + int lock, + int redirectlock, + bool redirect); + +// msg related functions +INLINE void transferObject(struct transObjInfo * transObj); #ifdef PROFILE INLINE void profileTaskStart(char * taskname); @@ -624,6 +510,6 @@ void outputProfileData(); // stores to incoherent memory // ///////////////////////////////////////////////////////////////////////////// -#endif // #ifdef MULTICORE #endif // #ifdef TASK +#endif // #ifdef MULTICORE #endif // #ifndef MULTICORE_RUNTIME diff --git a/Robust/src/Runtime/bamboo/multicoretask.c b/Robust/src/Runtime/bamboo/multicoretask.c index 02c0472c..37b30789 100644 --- a/Robust/src/Runtime/bamboo/multicoretask.c +++ b/Robust/src/Runtime/bamboo/multicoretask.c @@ -24,373 +24,20 @@ int enqueuetasks_I(struct parameterwrapper *parameter, int * enterflags, int numenterflags); -#ifdef MULTICORE_GC -#ifdef SMEMF -#define NUM_CORES2TEST 5 -#ifdef GC_1 -int core2test[1][NUM_CORES2TEST] = { - {0, -1, -1, -1, -1} -}; -#elif defined GC_56 -int core2test[56][NUM_CORES2TEST] = { - { 0, -1, 7, -1, 1}, { 1, -1, 8, 0, 2}, { 2, -1, 9, 1, 3}, - { 3, -1, 10, 2, 4}, { 4, -1, 11, 3, 5}, { 5, -1, 12, 4, 6}, - { 6, -1, 13, 5, -1}, { 7, 0, 14, -1, 8}, { 8, 1, 15, 7, 9}, - { 9, 2, 16, 8, 10}, {10, 3, 17, 9, 11}, {11, 4, 18, 10, 12}, - {12, 5, 19, 11, 13}, {13, 6, 20, 12, -1}, {14, 7, 21, -1, 15}, - {15, 8, 22, 14, 16}, {16, 9, 23, 15, 17}, {17, 10, 24, 16, 18}, - {18, 11, 25, 17, 19}, {19, 12, 26, 18, 20}, {20, 13, 27, 19, -1}, - {21, 14, 28, -1, 22}, {22, 15, 29, 21, 23}, {23, 16, 30, 22, 24}, - {24, 17, 31, 23, 25}, {25, 18, 32, 24, 26}, {26, 19, 33, 25, 27}, - {27, 20, 34, 26, -1}, {28, 21, 35, -1, 29}, {29, 22, 36, 28, 30}, - {30, 23, 37, 29, 31}, {31, 24, 38, 30, 32}, {32, 25, 39, 31, 33}, - {33, 26, 40, 32, 34}, {34, 27, 41, 33, -1}, {35, 28, 42, -1, 36}, - {36, 29, 43, 35, 37}, {37, 30, 44, 36, 38}, {38, 31, 45, 37, 39}, - {39, 32, 46, 38, 40}, {40, 33, 47, 39, 41}, {41, 34, 48, 40, -1}, - {42, 35, 49, -1, 43}, {43, 36, 50, 42, 44}, {44, 37, 51, 43, 45}, - {45, 38, 52, 44, 46}, {46, 39, 53, 45, 47}, {47, 40, 54, 46, 48}, - {48, 41, 55, 47, -1}, {49, 42, -1, -1, 50}, {50, 43, -1, 49, 51}, - {51, 44, -1, 50, 52}, {52, 45, -1, 51, 53}, {53, 46, -1, 52, 54}, - {54, 47, -1, 53, 55}, {55, 48, -1, 54, -1} -}; -#elif defined GC_62 -int core2test[62][NUM_CORES2TEST] = { - { 0, -1, 6, -1, 1}, { 1, -1, 7, 0, 2}, { 2, -1, 8, 1, 3}, - { 3, -1, 9, 2, 4}, { 4, -1, 10, 3, 5}, { 5, -1, 11, 4, -1}, - { 6, 0, 14, -1, 7}, { 7, 1, 15, 6, 8}, { 8, 2, 16, 7, 9}, - { 9, 3, 17, 8, 10}, {10, 4, 18, 9, 11}, {11, 5, 19, 10, 12}, - {12, -1, 20, 11, 13}, {13, -1, 21, 12, -1}, {14, 6, 22, -1, 15}, - {15, 7, 23, 14, 16}, {16, 8, 24, 15, 17}, {17, 9, 25, 16, 18}, - {18, 10, 26, 17, 19}, {19, 11, 27, 18, 20}, {20, 12, 28, 19, 21}, - {21, 13, 29, 28, -1}, {22, 14, 30, -1, 23}, {23, 15, 31, 22, 24}, - {24, 16, 32, 23, 25}, {25, 17, 33, 24, 26}, {26, 18, 34, 25, 27}, - {27, 19, 35, 26, 28}, {28, 20, 36, 27, 29}, {29, 21, 37, 28, -1}, - {30, 22, 38, -1, 31}, {31, 23, 39, 30, 32}, {32, 24, 40, 31, 33}, - {33, 25, 41, 32, 34}, {34, 26, 42, 33, 35}, {35, 27, 43, 34, 36}, - {36, 28, 44, 35, 37}, {37, 29, 45, 36, -1}, {38, 30, 46, -1, 39}, - {39, 31, 47, 38, 40}, {40, 32, 48, 39, 41}, {41, 33, 49, 40, 42}, - {42, 34, 50, 41, 43}, {43, 35, 51, 42, 44}, {44, 36, 52, 43, 45}, - {45, 37, 53, 44, -1}, {46, 38, 54, -1, 47}, {47, 39, 55, 46, 48}, - {48, 40, 56, 47, 49}, {49, 41, 57, 48, 50}, {50, 42, 58, 49, 51}, - {51, 43, 59, 50, 52}, {52, 44, 60, 51, 53}, {53, 45, 61, 52, -1}, - {54, 46, -1, -1, 55}, {55, 47, -1, 54, 56}, {56, 48, -1, 55, 57}, - {57, 49, -1, 56, 59}, {58, 50, -1, 57, 59}, {59, 51, -1, 58, 60}, - {60, 52, -1, 59, 61}, {61, 53, -1, 60, -1} -}; -#endif // GC_1 -#elif defined SMEMM -unsigned int gcmem_mixed_threshold = 0; -unsigned int gcmem_mixed_usedmem = 0; -#define NUM_CORES2TEST 9 -#ifdef GC_1 -int core2test[1][NUM_CORES2TEST] = { - {0, -1, -1, -1, -1, -1, -1, -1, -1} -}; -#elif defined GC_56 -int core2test[56][NUM_CORES2TEST] = { - { 0, -1, 7, -1, 1, -1, 14, -1, 2}, - { 1, -1, 8, 0, 2, -1, 15, -1, 3}, - { 2, -1, 9, 1, 3, -1, 16, 0, 4}, - { 3, -1, 10, 2, 4, -1, 17, 1, 5}, - { 4, -1, 11, 3, 5, -1, 18, 2, 6}, - { 5, -1, 12, 4, 6, -1, 19, 3, -1}, - { 6, -1, 13, 5, -1, -1, 20, 4, -1}, - { 7, 0, 14, -1, 8, -1, 21, -1, 9}, - { 8, 1, 15, 7, 9, -1, 22, -1, 10}, - { 9, 2, 16, 8, 10, -1, 23, 7, 11}, - {10, 3, 17, 9, 11, -1, 24, 8, 12}, - {11, 4, 18, 10, 12, -1, 25, 9, 13}, - {12, 5, 19, 11, 13, -1, 26, 10, -1}, - {13, 6, 20, 12, -1, -1, 27, 11, -1}, - {14, 7, 21, -1, 15, 0, 28, -1, 16}, - {15, 8, 22, 14, 16, 1, 29, -1, 17}, - {16, 9, 23, 15, 17, 2, 30, 14, 18}, - {17, 10, 24, 16, 18, 3, 31, 15, 19}, - {18, 11, 25, 17, 19, 4, 32, 16, 20}, - {19, 12, 26, 18, 20, 5, 33, 17, -1}, - {20, 13, 27, 19, -1, 6, 34, 18, -1}, - {21, 14, 28, -1, 22, 7, 35, -1, 23}, - {22, 15, 29, 21, 23, 8, 36, -1, 24}, - {23, 16, 30, 22, 24, 9, 37, 21, 25}, - {24, 17, 31, 23, 25, 10, 38, 22, 26}, - {25, 18, 32, 24, 26, 11, 39, 23, 27}, - {26, 19, 33, 25, 27, 12, 40, 24, -1}, - {27, 20, 34, 26, -1, 13, 41, 25, -1}, - {28, 21, 35, -1, 29, 14, 42, -1, 30}, - {29, 22, 36, 28, 30, 15, 43, -1, 31}, - {30, 23, 37, 29, 31, 16, 44, 28, 32}, - {31, 24, 38, 30, 32, 17, 45, 29, 33}, - {32, 25, 39, 31, 33, 18, 46, 30, 34}, - {33, 26, 40, 32, 34, 19, 47, 31, -1}, - {34, 27, 41, 33, -1, 20, 48, 32, -1}, - {35, 28, 42, -1, 36, 21, 49, -1, 37}, - {36, 29, 43, 35, 37, 22, 50, -1, 38}, - {37, 30, 44, 36, 38, 23, 51, 35, 39}, - {38, 31, 45, 37, 39, 24, 52, 36, 40}, - {39, 32, 46, 38, 40, 25, 53, 37, 41}, - {40, 33, 47, 39, 41, 26, 54, 38, -1}, - {41, 34, 48, 40, -1, 27, 55, 39, -1}, - {42, 35, 49, -1, 43, 28, -1, -1, 44}, - {43, 36, 50, 42, 44, 29, -1, -1, 45}, - {44, 37, 51, 43, 45, 30, -1, 42, 46}, - {45, 38, 52, 44, 46, 31, -1, 43, 47}, - {46, 39, 53, 45, 47, 32, -1, 44, 48}, - {47, 40, 54, 46, 48, 33, -1, 45, -1}, - {48, 41, 55, 47, -1, 34, -1, 46, -1}, - {49, 42, -1, -1, 50, 35, -1, -1, 51}, - {50, 43, -1, 49, 51, 36, -1, -1, 52}, - {51, 44, -1, 50, 52, 37, -1, 49, 53}, - {52, 45, -1, 51, 53, 38, -1, 50, 54}, - {53, 46, -1, 52, 54, 39, -1, 51, 55}, - {54, 47, -1, 53, 55, 40, -1, 52, -1}, - {55, 48, -1, 54, -1, 41, -1, 53, -1} -}; -#elif defined GC_62 -int core2test[62][NUM_CORES2TEST] = { - { 0, -1, 6, -1, 1, -1, 14, -1, 2}, - { 1, -1, 7, 0, 2, -1, 15, -1, 3}, - { 2, -1, 8, 1, 3, -1, 16, 0, 4}, - { 3, -1, 9, 2, 4, -1, 17, 1, 5}, - { 4, -1, 10, 3, 5, -1, 18, 2, -1}, - { 5, -1, 11, 4, -1, -1, 19, 3, -1}, - { 6, 0, 14, -1, 7, -1, 22, -1, 8}, - { 7, 1, 15, 6, 8, -1, 23, -1, 9}, - { 8, 2, 16, 7, 9, -1, 24, 6, 10}, - { 9, 3, 17, 8, 10, -1, 25, 7, 11}, - {10, 4, 18, 9, 11, -1, 26, 8, 12}, - {11, 5, 19, 10, 12, -1, 27, 9, 13}, - {12, -1, 20, 11, 13, -1, 28, 10, -1}, - {13, -1, 21, 12, -1, -1, 29, 11, -1}, - {14, 6, 22, -1, 15, 0, 30, -1, 16}, - {15, 7, 23, 14, 16, 1, 31, -1, 17}, - {16, 8, 24, 15, 17, 2, 32, 14, 18}, - {17, 9, 25, 16, 18, 3, 33, 15, 19}, - {18, 10, 26, 17, 19, 4, 34, 16, 20}, - {19, 11, 27, 18, 20, 5, 35, 17, 21}, - {20, 12, 28, 19, 21, -1, 36, 18, -1}, - {21, 13, 29, 28, -1, -1, 37, 19, -1}, - {22, 14, 30, -1, 23, 6, 38, -1, 24}, - {23, 15, 31, 22, 24, 7, 39, -1, 25}, - {24, 16, 32, 23, 25, 8, 40, 22, 26}, - {25, 17, 33, 24, 26, 9, 41, 23, 27}, - {26, 18, 34, 25, 27, 10, 42, 24, 28}, - {27, 19, 35, 26, 28, 11, 43, 25, 29}, - {28, 20, 36, 27, 29, 12, 44, 26, -1}, - {29, 21, 37, 28, -1, 13, 45, 27, -1}, - {30, 22, 38, -1, 31, 22, 46, -1, 32}, - {31, 23, 39, 30, 32, 15, 47, -1, 33}, - {32, 24, 40, 31, 33, 16, 48, 30, 34}, - {33, 25, 41, 32, 34, 17, 49, 31, 35}, - {34, 26, 42, 33, 35, 18, 50, 32, 36}, - {35, 27, 43, 34, 36, 19, 51, 33, 37}, - {36, 28, 44, 35, 37, 20, 52, 34, -1}, - {37, 29, 45, 36, -1, 21, 53, 35, -1}, - {38, 30, 46, -1, 39, 22, 54, -1, 40}, - {39, 31, 47, 38, 40, 23, 55, -1, 41}, - {40, 32, 48, 39, 41, 24, 56, 38, 42}, - {41, 33, 49, 40, 42, 25, 57, 39, 43}, - {42, 34, 50, 41, 43, 26, 58, 40, 44}, - {43, 35, 51, 42, 44, 27, 59, 41, 45}, - {44, 36, 52, 43, 45, 28, 60, 42, -1}, - {45, 37, 53, 44, -1, 29, 61, 43, -1}, - {46, 38, 54, -1, 47, 30, -1, -1, 48}, - {47, 39, 55, 46, 48, 31, -1, -1, 49}, - {48, 40, 56, 47, 49, 32, -1, 46, 50}, - {49, 41, 57, 48, 50, 33, -1, 47, 51}, - {50, 42, 58, 49, 51, 34, -1, 48, 52}, - {51, 43, 59, 50, 52, 35, -1, 49, 53}, - {52, 44, 60, 51, 53, 36, -1, 50, -1}, - {53, 45, 61, 52, -1, 37, -1, 51, -1}, - {54, 46, -1, -1, 55, 38, -1, -1, 56}, - {55, 47, -1, 54, 56, 39, -1, -1, 57}, - {56, 48, -1, 55, 57, 40, -1, 54, 58}, - {57, 49, -1, 56, 59, 41, -1, 55, 59}, - {58, 50, -1, 57, 59, 42, -1, 56, 60}, - {59, 51, -1, 58, 60, 43, -1, 57, 61}, - {60, 52, -1, 59, 61, 44, -1, 58, -1}, - {61, 53, -1, 60, -1, 45, -1, 59, -1} -}; -#endif // GC_1 -#endif - -inline __attribute__((always_inline)) -void setupsmemmode(void) { -#ifdef SMEML - // Only allocate local mem chunks to each core. - // If a core has used up its local shared memory, start gc. - bamboo_smem_mode = SMEMLOCAL; -#elif defined SMEMF - // Allocate the local shared memory to each core with the highest priority, - // if a core has used up its local shared memory, try to allocate the - // shared memory that belong to its neighbours, if also failed, start gc. - bamboo_smem_mode = SMEMFIXED; -#elif defined SMEMM - // Allocate the local shared memory to each core with the highest priority, - // if a core has used up its local shared memory, try to allocate the - // shared memory that belong to its neighbours first, if failed, check - // current memory allocation rate, if it has already reached the threshold, - // start gc, otherwise, allocate the shared memory globally. If all the - // shared memory has been used up, start gc. - bamboo_smem_mode = SMEMMIXED; -#elif defined SMEMG - // Allocate all the memory chunks globally, do not consider the host cores - // When all the shared memory are used up, start gc. - bamboo_smem_mode = SMEMGLOBAL; -#else - // defaultly using local mode - bamboo_smem_mode = SMEMLOCAL; -#endif -} // void setupsmemmode(void) -#endif - -inline __attribute__((always_inline)) -void initruntimedata() { - int i; - // initialize the arrays +INLINE void inittaskdata() { + int i = 0; + if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { // startup core to initialize corestatus[] for(i = 0; i < NUMCORESACTIVE; ++i) { - corestatus[i] = 1; - numsendobjs[i] = 0; - numreceiveobjs[i] = 0; #ifdef PROFILE // initialize the profile data arrays profilestatus[i] = 1; -#endif -#ifdef MULTICORE_GC - gccorestatus[i] = 1; - gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0; - gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0; -#endif +#endif // PROFILE } // for(i = 0; i < NUMCORESACTIVE; ++i) -#ifdef MULTICORE_GC - for(i = 0; i < NUMCORES4GC; ++i) { - gcloads[i] = 0; - gcrequiredmems[i] = 0; - gcstopblock[i] = 0; - gcfilledblocks[i] = 0; - } // for(i = 0; i < NUMCORES4GC; ++i) -#ifdef GC_PROFILE - gc_infoIndex = 0; - gc_infoOverflow = false; - gc_num_livespace = 0; - gc_num_freespace = 0; -#endif -#endif - numconfirm = 0; - waitconfirm = false; - - // TODO for test - total_num_t6 = 0; - } - - busystatus = true; - self_numsendobjs = 0; - self_numreceiveobjs = 0; - - for(i = 0; i < BAMBOO_MSG_BUF_LENGTH; ++i) { - msgdata[i] = -1; + total_num_t6 = 0; // TODO for test } - msgdataindex = 0; - msgdatalast = 0; - msglength = BAMBOO_MSG_BUF_LENGTH; - msgdatafull = false; - for(i = 0; i < BAMBOO_OUT_BUF_LENGTH; ++i) { - outmsgdata[i] = -1; - } - outmsgindex = 0; - outmsglast = 0; - outmsgleft = 0; - isMsgHanging = false; - - smemflag = true; - bamboo_cur_msp = NULL; - bamboo_smem_size = 0; totransobjqueue = createQueue_I(); - -#ifdef MULTICORE_GC - bamboo_smem_zero_top = NULL; - gcflag = false; - gcprocessing = false; - gcphase = FINISHPHASE; - gcprecheck = true; - gccurr_heaptop = 0; - gcself_numsendobjs = 0; - gcself_numreceiveobjs = 0; - gcmarkedptrbound = 0; -#ifdef LOCALHASHTBL_TEST - gcpointertbl = allocateRuntimeHash_I(20); -#else - gcpointertbl = mgchashCreate_I(2000, 0.75); -#endif - gcforwardobjtbl = allocateMGCHash_I(20, 3); - gcobj2map = 0; - gcmappedobj = 0; - gcnumlobjs = 0; - gcheaptop = 0; - gctopcore = 0; - gctopblock = 0; - gcmovestartaddr = 0; - gctomove = false; - gcmovepending = 0; - gcblock2fill = 0; - gcsbstarttbl = BAMBOO_BASE_VA; - bamboo_smemtbl = (void *)gcsbstarttbl - + (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE)*sizeof(INTPTR); - if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) { - int t_size = ((BAMBOO_RMSP_SIZE)-sizeof(mgcsharedhashtbl_t)*2 - -128*sizeof(size_t))/sizeof(mgcsharedhashlistnode_t)-2; - int kk = 0; - unsigned int tmp_k = 1 << (sizeof(int)*8 -1); - while(((t_size & tmp_k) == 0) && (kk < sizeof(int)*8)) { - t_size = t_size << 1; - kk++; - } - t_size = tmp_k >> kk; - gcsharedptbl = mgcsharedhashCreate_I(t_size,0.30); - } else { - gcsharedptbl = NULL; - } - BAMBOO_MEMSET_WH(gcrpointertbls, 0, - sizeof(mgcsharedhashtbl_t *)*NUMCORES4GC); -#ifdef SMEMM - gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE - -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8); - gcmem_mixed_usedmem = 0; -#endif -#ifdef GC_PROFILE - gc_num_obj = 0; - gc_num_liveobj = 0; - gc_num_forwardobj = 0; - gc_num_profiles = NUMCORESACTIVE - 1; -#endif -#ifdef GC_FLUSH_DTLB - gc_num_flush_dtlb = 0; -#endif - gc_localheap_s = false; -#ifdef GC_CACHE_ADAPT - gccachestage = false; -#endif // GC_CACHE_ADAPT -#else - // create the lock table, lockresult table and obj queue - locktable.size = 20; - locktable.bucket = - (struct RuntimeNode **) RUNMALLOC_I(sizeof(struct RuntimeNode *)*20); - /* Set allocation blocks*/ - locktable.listhead=NULL; - locktable.listtail=NULL; - /*Set data counts*/ - locktable.numelements = 0; - lockobj = 0; - lock2require = 0; - lockresult = 0; - lockflag = false; - lockRedirectTbl = allocateRuntimeHash_I(20); - objRedirectLockTbl = allocateRuntimeHash_I(20); -#endif -#ifndef INTERRUPT - reside = false; -#endif objqueue.head = NULL; objqueue.tail = NULL; @@ -412,22 +59,27 @@ void initruntimedata() { runtime_locks[i].value = 0; } runtime_locklen = 0; -} -inline __attribute__((always_inline)) -void disruntimedata() { -#ifdef MULTICORE_GC -#ifdef LOCALHASHTBL_TEST - freeRuntimeHash(gcpointertbl); -#else - mgchashDelete(gcpointertbl); -#endif - freeMGCHash(gcforwardobjtbl); -#else - freeRuntimeHash(lockRedirectTbl); - freeRuntimeHash(objRedirectLockTbl); - RUNFREE(locktable.bucket); +#ifndef MULTICORE_GC + // create the lock table, lockresult table and obj queue + locktable.size = 20; + locktable.bucket = + (struct RuntimeNode **) RUNMALLOC_I(sizeof(struct RuntimeNode *)*20); + /* Set allocation blocks*/ + locktable.listhead=NULL; + locktable.listtail=NULL; + /*Set data counts*/ + locktable.numelements = 0; + lockobj = 0; + lock2require = 0; + lockresult = 0; + lockflag = false; + lockRedirectTbl = allocateRuntimeHash_I(20); + objRedirectLockTbl = allocateRuntimeHash_I(20); #endif +} + +INLINE void distaskdata() { if(activetasks != NULL) { genfreehashtable(activetasks); } @@ -436,12 +88,14 @@ void disruntimedata() { RUNFREE(currtpd); currtpd = NULL; } - BAMBOO_LOCAL_MEM_CLOSE(); - BAMBOO_SHARE_MEM_CLOSE(); +#ifndef MULTICORE_GC + freeRuntimeHash(lockRedirectTbl); + freeRuntimeHash(objRedirectLockTbl); + RUNFREE(locktable.bucket); +#endif } -inline __attribute__((always_inline)) -bool checkObjQueue() { +INLINE bool checkObjQueue() { bool rflag = false; struct transObjInfo * objInfo = NULL; int grount = 0; @@ -550,273 +204,6 @@ objqueuebreak: return rflag; } -inline __attribute__((always_inline)) -void checkCoreStatus() { - bool allStall = false; - int i = 0; - int sumsendobj = 0; - if((!waitconfirm) || - (waitconfirm && (numconfirm == 0))) { - BAMBOO_DEBUGPRINT(0xee04); - BAMBOO_DEBUGPRINT_REG(waitconfirm); - BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); - BAMBOO_DEBUGPRINT(0xf001); - corestatus[BAMBOO_NUM_OF_CORE] = 0; - numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs; - numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs; - // check the status of all cores - allStall = true; - BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE); - for(i = 0; i < NUMCORESACTIVE; ++i) { - BAMBOO_DEBUGPRINT(0xe000 + corestatus[i]); - if(corestatus[i] != 0) { - allStall = false; - break; - } - } // for(i = 0; i < NUMCORESACTIVE; ++i) - if(allStall) { - // check if the sum of send objs and receive obj are the same - // yes->check if the info is the latest; no->go on executing - sumsendobj = 0; - for(i = 0; i < NUMCORESACTIVE; ++i) { - sumsendobj += numsendobjs[i]; - BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]); - } // for(i = 0; i < NUMCORESACTIVE; ++i) - for(i = 0; i < NUMCORESACTIVE; ++i) { - sumsendobj -= numreceiveobjs[i]; - BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]); - } // for(i = 0; i < NUMCORESACTIVE; ++i) - if(0 == sumsendobj) { - if(!waitconfirm) { - // the first time found all cores stall - // send out status confirm msg to all other cores - // reset the corestatus array too - BAMBOO_DEBUGPRINT(0xee05); - corestatus[BAMBOO_NUM_OF_CORE] = 1; - waitconfirm = true; - numconfirm = NUMCORESACTIVE - 1; - BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); - for(i = 1; i < NUMCORESACTIVE; ++i) { - corestatus[i] = 1; - // send status confirm msg to core i - send_msg_1(i, STATUSCONFIRM, false); - } // for(i = 1; i < NUMCORESACTIVE; ++i) - return; - } else { - // all the core status info are the latest - // terminate; for profiling mode, send request to all - // other cores to pour out profiling data - BAMBOO_DEBUGPRINT(0xee06); - -#ifdef USEIO - totalexetime = BAMBOO_GET_EXE_TIME() - bamboo_start_time; -#else - - BAMBOO_PRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time); - //BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test -#ifdef GC_FLUSH_DTLB - BAMBOO_PRINT_REG(gc_num_flush_dtlb); -#endif -#ifndef BAMBOO_MEMPROF - BAMBOO_PRINT(0xbbbbbbbb); -#endif -#endif - // profile mode, send msgs to other cores to request pouring - // out progiling data -#ifdef PROFILE - BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); - BAMBOO_DEBUGPRINT(0xf000); - for(i = 1; i < NUMCORESACTIVE; ++i) { - // send profile request msg to core i - send_msg_2(i, PROFILEOUTPUT, totalexetime, false); - } // for(i = 1; i < NUMCORESACTIVE; ++i) -#ifndef RT_TEST - // pour profiling data on startup core - outputProfileData(); -#endif - while(true) { - BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); - BAMBOO_DEBUGPRINT(0xf001); - profilestatus[BAMBOO_NUM_OF_CORE] = 0; - // check the status of all cores - allStall = true; - BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE); - for(i = 0; i < NUMCORESACTIVE; ++i) { - BAMBOO_DEBUGPRINT(0xe000 + profilestatus[i]); - if(profilestatus[i] != 0) { - allStall = false; - break; - } - } // for(i = 0; i < NUMCORESACTIVE; ++i) - if(!allStall) { - int halt = 100; - BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); - BAMBOO_DEBUGPRINT(0xf000); - while(halt--) { - } - } else { - BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); - break; - } // if(!allStall) - } // while(true) -#endif - - // gc_profile mode, output gc prfiling data -#ifdef MULTICORE_GC -#ifdef GC_CACHE_ADAPT - bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt -#endif // GC_CACHE_ADAPT -#ifdef GC_PROFILE - gc_outputProfileData(); -#endif // #ifdef GC_PROFILE -#endif // #ifdef MULTICORE_GC - disruntimedata(); - BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); - terminate(); // All done. - } // if(!waitconfirm) - } else { - // still some objects on the fly on the network - // reset the waitconfirm and numconfirm - BAMBOO_DEBUGPRINT(0xee07); - waitconfirm = false; - numconfirm = 0; - } // if(0 == sumsendobj) - } else { - // not all cores are stall, keep on waiting - BAMBOO_DEBUGPRINT(0xee08); - waitconfirm = false; - numconfirm = 0; - } // if(allStall) - BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); - BAMBOO_DEBUGPRINT(0xf000); - } // if((!waitconfirm) || -} - -// main function for each core -inline void run(void * arg) { - int i = 0; - int argc = 1; - char ** argv = NULL; - bool sendStall = false; - bool isfirst = true; - bool tocontinue = false; - - corenum = BAMBOO_GET_NUM_OF_CORE(); - BAMBOO_DEBUGPRINT(0xeeee); - BAMBOO_DEBUGPRINT_REG(corenum); - BAMBOO_DEBUGPRINT(STARTUPCORE); - - // initialize runtime data structures - initruntimedata(); - - // other architecture related initialization - initialization(); - initCommunication(); - -#ifdef GC_CACHE_ADAPT -// enable the timer interrupt -#ifdef GC_CACHE_SAMPLING - bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); // TODO - bamboo_unmask_timer_intr(); - bamboo_dtlb_sampling_process(); -#endif // GC_CACHE_SAMPLING -#endif // GC_CACHE_ADAPT - - initializeexithandler(); - - // main process of the execution module - if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) { - // non-executing cores, only processing communications - activetasks = NULL; - fakeExecution(); - } else { - /* Create queue of active tasks */ - activetasks= - genallocatehashtable((unsigned int (*)(void *)) &hashCodetpd, - (int (*)(void *,void *)) &comparetpd); - - /* Process task information */ - processtasks(); - - if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { - /* Create startup object */ - createstartupobject(argc, argv); - } - - BAMBOO_DEBUGPRINT(0xee00); - - while(true) { - -#ifdef MULTICORE_GC - // check if need to do GC - if(gcflag) { - gc(NULL); - } -#endif // MULTICORE_GC - - // check if there are new active tasks can be executed - executetasks(); - if(busystatus) { - sendStall = false; - } - -#ifndef INTERRUPT - while(receiveObject() != -1) { - } -#endif - - BAMBOO_DEBUGPRINT(0xee01); - - // check if there are some pending objects, - // if yes, enqueue them and executetasks again - tocontinue = checkObjQueue(); - - if(!tocontinue) { - // check if stop - if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { - if(isfirst) { - BAMBOO_DEBUGPRINT(0xee03); - isfirst = false; - } - checkCoreStatus(); - } else { - if(!sendStall) { - BAMBOO_DEBUGPRINT(0xee09); -#ifdef PROFILE - if(!stall) { -#endif - if(isfirst) { - // wait for some time - int halt = 10000; - BAMBOO_DEBUGPRINT(0xee0a); - while(halt--) { - } - isfirst = false; - } else { - // send StallMsg to startup core - BAMBOO_DEBUGPRINT(0xee0b); - // send stall msg - send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs, false); - sendStall = true; - isfirst = true; - busystatus = false; - } -#ifdef PROFILE - } -#endif - } else { - isfirst = true; - busystatus = false; - BAMBOO_DEBUGPRINT(0xee0c); - } // if(!sendStall) - } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) - } // if(!tocontinue) - } // while(true) - } // if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) - -} // run() - struct ___createstartupobject____I_locals { INTPTR size; void * next; @@ -1450,692 +837,73 @@ inline void addNewObjInfo(void * nobj) { } #endif -#ifdef MULTICORE_GC -// Only allocate local mem chunks to each core. -// If a core has used up its local shared memory, start gc. -void * localmalloc_I(int coren, - int isize, - int * allocsize) { - void * mem = NULL; - int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC); - int i = 0; - int j = 0; - int tofindb = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j; - int totest = tofindb; - int bound = BAMBOO_SMEM_SIZE_L; - int foundsmem = 0; - int size = 0; - do { - bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; - int nsize = bamboo_smemtbl[totest]; - bool islocal = true; - if(nsize < bound) { - bool tocheck = true; - // have some space in the block - if(totest == tofindb) { - // the first partition - size = bound - nsize; - } else if(nsize == 0) { - // an empty partition, can be appended - size += bound; - } else { - // not an empty partition, can not be appended - // the last continuous block is not big enough, go to check the next - // local block - islocal = true; - tocheck = false; - } // if(totest == tofindb) else if(nsize == 0) else ... - if(tocheck) { - if(size >= isize) { - // have enough space in the block, malloc - foundsmem = 1; - break; - } else { - // no enough space yet, try to append next continuous block - islocal = false; - } // if(size > isize) else ... - } // if(tocheck) - } // if(nsize < bound) - if(islocal) { - // no space in the block, go to check the next block - i++; - if(2==i) { - i = 0; - j++; - } - tofindb = totest = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j; - } else { - totest += 1; - } // if(islocal) else ... - if(totest > gcnumblock-1-bamboo_reserved_smem) { - // no more local mem, do not find suitable block - foundsmem = 2; - break; - } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... - } while(true); - - if(foundsmem == 1) { - // find suitable block - mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb= isize) { - // have enough space in the block, malloc - foundsmem = 1; - break; - } else { - // no enough space yet, try to append next continuous block - // TODO may consider to go to next local block? - islocal = false; - } // if(size > isize) else ... - } // if(tocheck) - } // if(nsize < bound) - if(islocal) { - // no space in the block, go to check the next block - i++; - if(2==i) { - i = 0; - j++; - } - tofindb=totest= - gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; - } else { - totest += 1; - } // if(islocal) else ... - if(totest > gcnumblock-1-bamboo_reserved_smem) { - // no more local mem, do not find suitable block on local mem - // try to malloc shared memory assigned to the neighbour cores - do{ - k++; - if(k >= NUM_CORES2TEST) { - // no more memory available on either coren or its neighbour cores - foundsmem = 2; - goto memsearchresult; - } - } while(core2test[gccorenum][k] == -1); - i = 0; - j = 0; - tofindb=totest= - gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; - } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... - } while(true); - -memsearchresult: - if(foundsmem == 1) { - // find suitable block - mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb NUMCORESACTIVE - 1) { +#ifndef CLOSE_PRINT + BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[2]*/); +#endif + BAMBOO_EXIT(0xe005); } - - return mem; -} // void * fixedmalloc_I(int, int, int *) -#endif // #ifdef SMEMF - -#ifdef SMEMM -// Allocate the local shared memory to each core with the highest priority, -// if a core has used up its local shared memory, try to allocate the -// shared memory that belong to its neighbours first, if failed, check -// current memory allocation rate, if it has already reached the threshold, -// start gc, otherwise, allocate the shared memory globally. If all the -// shared memory has been used up, start gc. -void * mixedmalloc_I(int coren, - int isize, - int * allocsize) { - void * mem = NULL; - int i = 0; - int j = 0; - int k = 0; - int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC); - int ii = 1; - int tofindb = gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; - int totest = tofindb; - int bound = BAMBOO_SMEM_SIZE_L; - int foundsmem = 0; - int size = 0; - do { - bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; - int nsize = bamboo_smemtbl[totest]; - bool islocal = true; - if(nsize < bound) { - bool tocheck = true; - // have some space in the block - if(totest == tofindb) { - // the first partition - size = bound - nsize; - } else if(nsize == 0) { - // an empty partition, can be appended - size += bound; + // store the object and its corresponding queue info, enqueue it later + transObj->objptr = (void *)msgdata[msgdataindex]; //[2] + MSG_INDEXINC_I(); + transObj->length = (msglength - 3) / 2; + transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3)); + for(k = 0; k < transObj->length; ++k) { + transObj->queues[2*k] = msgdata[msgdataindex]; //[3+2*k]; + MSG_INDEXINC_I(); + transObj->queues[2*k+1] = msgdata[msgdataindex]; //[3+2*k+1]; + MSG_INDEXINC_I(); + } + // check if there is an existing duplicate item + { + struct QueueItem * qitem = getHead(&objqueue); + struct QueueItem * prev = NULL; + while(qitem != NULL) { + struct transObjInfo * tmpinfo = + (struct transObjInfo *)(qitem->objectptr); + if(tmpinfo->objptr == transObj->objptr) { + // the same object, remove outdate one + RUNFREE(tmpinfo->queues); + RUNFREE(tmpinfo); + removeItem(&objqueue, qitem); + //break; } else { - // not an empty partition, can not be appended - // the last continuous block is not big enough, go to check the next - // local block - islocal = true; - tocheck = false; - } // if(totest == tofindb) else if(nsize == 0) else ... - if(tocheck) { - if(size >= isize) { - // have enough space in the block, malloc - foundsmem = 1; - break; - } else { - // no enough space yet, try to append next continuous block - // TODO may consider to go to next local block? - islocal = false; - } // if(size > isize) else ... - } // if(tocheck) - } // if(nsize < bound) - if(islocal) { - // no space in the block, go to check the next block - i++; - if(2==i) { - i = 0; - j++; + prev = qitem; } - tofindb=totest= - gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; - } else { - totest += 1; - } // if(islocal) else ... - if(totest > gcnumblock-1-bamboo_reserved_smem) { - // no more local mem, do not find suitable block on local mem - // try to malloc shared memory assigned to the neighbour cores - do{ - k++; - if(k >= NUM_CORES2TEST) { - if(gcmem_mixed_usedmem >= gcmem_mixed_threshold) { - // no more memory available on either coren or its neighbour cores - foundsmem = 2; - goto memmixedsearchresult; - } else { - // try allocate globally - mem = globalmalloc_I(coren, isize, allocsize); - return mem; - } - } - } while(core2test[gccorenum][k] == -1); - i = 0; - j = 0; - tofindb=totest= - gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; - } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... - } while(true); - -memmixedsearchresult: - if(foundsmem == 1) { - // find suitable block - mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb gcnumblock-1-bamboo_reserved_smem) { - // Out of shared memory - *allocsize = 0; - return NULL; - } - do { - bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; - int nsize = bamboo_smemtbl[totest]; - bool isnext = false; - if(nsize < bound) { - bool tocheck = true; - // have some space in the block - if(totest == tofindb) { - // the first partition - size = bound - nsize; - } else if(nsize == 0) { - // an empty partition, can be appended - size += bound; + if(prev == NULL) { + qitem = getHead(&objqueue); } else { - // not an empty partition, can not be appended - // the last continuous block is not big enough, start another block - isnext = true; - tocheck = false; - } // if(totest == tofindb) else if(nsize == 0) else ... - if(tocheck) { - if(size >= isize) { - // have enough space in the block, malloc - foundsmem = 1; - break; - } // if(size > isize) - } // if(tocheck) - } else { - isnext = true; - } // if(nsize < bound) else ... - totest += 1; - if(totest > gcnumblock-1-bamboo_reserved_smem) { - // no more local mem, do not find suitable block - foundsmem = 2; - break; - } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... - if(isnext) { - // start another block - tofindb = totest; - } // if(islocal) - } while(true); - - if(foundsmem == 1) { - // find suitable block - mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb(BAMBOO_SMEM_SIZE)) ? (size) : (BAMBOO_SMEM_SIZE); - if(toallocate > bamboo_free_smem_size) { - // no enough mem - mem = NULL; - } else { - mem = (void *)bamboo_free_smemp; - bamboo_free_smemp = ((void*)bamboo_free_smemp) + toallocate; - bamboo_free_smem_size -= toallocate; + addNewItem_I(&objqueue, (void *)transObj); } - *allocsize = toallocate; - if(mem == NULL) { -#endif // MULTICORE_GC - // no enough shared global memory - *allocsize = 0; + ++(self_numreceiveobjs); #ifdef MULTICORE_GC - if(!gcflag) { - gcflag = true; - // inform other cores to stop and wait for gc + if(gcprocessing) { + if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { + // set the gcprecheck to enable checking again gcprecheck = true; - for(int i = 0; i < NUMCORESACTIVE; i++) { - // reuse the gcnumsendobjs & gcnumreceiveobjs - gccorestatus[i] = 1; - gcnumsendobjs[0][i] = 0; - gcnumreceiveobjs[0][i] = 0; - } - for(int i = 0; i < NUMCORESACTIVE; i++) { - if(i != BAMBOO_NUM_OF_CORE) { - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_1(i, GCSTARTPRE); - } else { - send_msg_1(i, GCSTARTPRE, true); - } - } + } else { + // send a update pregc information msg to the master core + if(BAMBOO_CHECK_SEND_MODE()) { + cache_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, + self_numsendobjs, self_numreceiveobjs); + } else { + send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, + self_numsendobjs, self_numreceiveobjs, true); } } - return NULL; -#else - BAMBOO_DEBUGPRINT(0xe003); - BAMBOO_EXIT(0xe003); -#endif } - return mem; -} // void * smemalloc_I(int, int, int) - -INLINE int checkMsgLength_I(int size) { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xcccc); -#endif - int type = msgdata[msgdataindex]; - switch(type) { - case STATUSCONFIRM: - case TERMINATE: -#ifdef MULTICORE_GC - case GCSTARTPRE: - case GCSTARTINIT: - case GCSTART: - case GCSTARTMAPINFO: - case GCSTARTFLUSH: - case GCFINISH: - case GCMARKCONFIRM: - case GCLOBJREQUEST: -#ifdef GC_CACHE_ADAPT - case GCSTARTPREF: -#endif // GC_CACHE_ADAPT -#endif // MULTICORE_GC - { - msglength = 1; - break; - } - - case PROFILEOUTPUT: - case PROFILEFINISH: -#ifdef MULTICORE_GC - case GCSTARTCOMPACT: - case GCMARKEDOBJ: - case GCFINISHINIT: - case GCFINISHMAPINFO: - case GCFINISHFLUSH: -#ifdef GC_CACHE_ADAPT - case GCFINISHPREF: -#endif // GC_CACHE_ADAPT -#endif // MULTICORE_GC - { - msglength = 2; - break; - } - - case MEMREQUEST: - case MEMRESPONSE: -#ifdef MULTICORE_GC - case GCMAPREQUEST: - case GCMAPINFO: - case GCMAPTBL: - case GCLOBJMAPPING: -#endif - { - msglength = 3; - break; - } - - case TRANSTALL: - case LOCKGROUNT: - case LOCKDENY: - case LOCKRELEASE: - case REDIRECTGROUNT: - case REDIRECTDENY: - case REDIRECTRELEASE: -#ifdef MULTICORE_GC - case GCFINISHPRE: - case GCFINISHMARK: - case GCMOVESTART: -#ifdef GC_PROFILE - case GCPROFILES: -#endif -#endif - { - msglength = 4; - break; - } - - case LOCKREQUEST: - case STATUSREPORT: -#ifdef MULTICORE_GC - case GCFINISHCOMPACT: - case GCMARKREPORT: -#endif - { - msglength = 5; - break; - } - - case REDIRECTLOCK: - { - msglength = 6; - break; - } - - case TRANSOBJ: // nonfixed size -#ifdef MULTICORE_GC - case GCLOBJINFO: -#endif - { // nonfixed size - if(size > 1) { - msglength = msgdata[(msgdataindex+1)&(BAMBOO_MSG_BUF_MASK)]; - } else { - return -1; - } - break; - } - - default: - { - BAMBOO_DEBUGPRINT_REG(type); - BAMBOO_DEBUGPRINT_REG(size); - BAMBOO_DEBUGPRINT_REG(msgdataindex); - BAMBOO_DEBUGPRINT_REG(msgdatalast); - BAMBOO_DEBUGPRINT_REG(msgdatafull); - int i = 6; - while(i-- > 0) { - BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]); - } - BAMBOO_EXIT(0xe004); - break; - } - } -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]); - BAMBOO_DEBUGPRINT(0xffff); -#endif - return msglength; -} - -INLINE void processmsg_transobj_I() { - MSG_INDEXINC_I(); - struct transObjInfo * transObj=RUNMALLOC_I(sizeof(struct transObjInfo)); - int k = 0; -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe880); -#endif - if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[2]*/); -#endif - BAMBOO_EXIT(0xe005); - } - // store the object and its corresponding queue info, enqueue it later - transObj->objptr = (void *)msgdata[msgdataindex]; //[2] - MSG_INDEXINC_I(); - transObj->length = (msglength - 3) / 2; - transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3)); - for(k = 0; k < transObj->length; ++k) { - transObj->queues[2*k] = msgdata[msgdataindex]; //[3+2*k]; - MSG_INDEXINC_I(); - transObj->queues[2*k+1] = msgdata[msgdataindex]; //[3+2*k+1]; - MSG_INDEXINC_I(); - } - // check if there is an existing duplicate item - { - struct QueueItem * qitem = getHead(&objqueue); - struct QueueItem * prev = NULL; - while(qitem != NULL) { - struct transObjInfo * tmpinfo = - (struct transObjInfo *)(qitem->objectptr); - if(tmpinfo->objptr == transObj->objptr) { - // the same object, remove outdate one - RUNFREE(tmpinfo->queues); - RUNFREE(tmpinfo); - removeItem(&objqueue, qitem); - //break; - } else { - prev = qitem; - } - if(prev == NULL) { - qitem = getHead(&objqueue); - } else { - qitem = getNextQueueItem(prev); - } - } - addNewItem_I(&objqueue, (void *)transObj); - } - ++(self_numreceiveobjs); -#ifdef MULTICORE_GC - if(gcprocessing) { - if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { - // set the gcprecheck to enable checking again - gcprecheck = true; - } else { - // send a update pregc information msg to the master core - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs); - } else { - send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs, true); - } - } - } -#endif -} - -INLINE void processmsg_transtall_I() { - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive stall msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/); -#endif - BAMBOO_EXIT(0xe006); - } - int num_core = msgdata[msgdataindex]; //[1] - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; //[2]; - MSG_INDEXINC_I(); - int data3 = msgdata[msgdataindex]; //[3]; - MSG_INDEXINC_I(); - if(num_core < NUMCORESACTIVE) { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe881); -#endif - corestatus[num_core] = 0; - numsendobjs[num_core] = data2; //[2]; - numreceiveobjs[num_core] = data3; //[3]; - } -} +#endif +} #ifndef MULTICORE_GC INLINE void processmsg_lockrequest_I() { @@ -2380,924 +1148,6 @@ INLINE void processmsg_profilefinish_I() { } #endif // #ifdef PROFILE -INLINE void processmsg_statusconfirm_I() { - if((BAMBOO_NUM_OF_CORE == STARTUPCORE) - || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) { - // wrong core to receive such msg - BAMBOO_EXIT(0xe011); - } else { - // send response msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe887); -#endif - // cache the msg first - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_5(STARTUPCORE, STATUSREPORT, - busystatus ? 1 : 0, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs); - } else { - send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, - BAMBOO_NUM_OF_CORE, self_numsendobjs, - self_numreceiveobjs, true); - } - } -} - -INLINE void processmsg_statusreport_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data3 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data4 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // receive a status confirm info - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // wrong core to receive such msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data2); -#endif - BAMBOO_EXIT(0xe012); - } else { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe888); -#endif - if(waitconfirm) { - numconfirm--; - } - corestatus[data2] = data1; - numsendobjs[data2] = data3; - numreceiveobjs[data2] = data4; - } -} - -INLINE void processmsg_terminate_I() { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe889); -#endif - disruntimedata(); -#ifdef MULTICORE_GC -#ifdef GC_CACHE_ADAPT - bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt -#endif // GC_CACHE_ADAPT -#endif // MULTICORE_GC - BAMBOO_EXIT_APP(0); -} - -INLINE void processmsg_memrequest_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // receive a shared memory request msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // wrong core to receive such msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data2); -#endif - BAMBOO_EXIT(0xe013); - } else { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe88a); -#endif - int allocsize = 0; - void * mem = NULL; -#ifdef MULTICORE_GC - if(gcprocessing) { - // is currently doing gc, dump this msg - if(INITPHASE == gcphase) { - // if still in the initphase of gc, send a startinit msg again, - // cache the msg first - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_1(data2, GCSTARTINIT); - } else { - send_msg_1(data2, GCSTARTINIT, true); - } - } - } else { -#endif - mem = smemalloc_I(data2, data1, &allocsize); - if(mem != NULL) { - // send the start_va to request core, cache the msg first - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_3(data2, MEMRESPONSE, mem, allocsize); - } else { - send_msg_3(data2, MEMRESPONSE, mem, allocsize, true); - } - } //else - // if mem == NULL, the gcflag of the startup core has been set - // and all the other cores have been informed to start gc -#ifdef MULTICORE_GC - } -#endif - } -} - -INLINE void processmsg_memresponse_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // receive a shared memory response msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe88b); -#endif -#ifdef MULTICORE_GC - // if is currently doing gc, dump this msg - if(!gcprocessing) { -#endif - if(data2 == 0) { - bamboo_smem_size = 0; - bamboo_cur_msp = 0; -#ifdef MULTICORE_GC - bamboo_smem_zero_top = 0; -#endif - } else { -#ifdef MULTICORE_GC - // fill header to store the size of this mem block - BAMBOO_MEMSET_WH(data1, '\0', BAMBOO_CACHE_LINE_SIZE); - (*((int*)data1)) = data2; - bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE; - bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE; - bamboo_smem_zero_top = bamboo_cur_msp; -#else - bamboo_smem_size = data2; - bamboo_cur_msp =(void*)(data1); -#endif - } - smemflag = true; -#ifdef MULTICORE_GC -} -#endif -} - -#ifdef MULTICORE_GC -INLINE void processmsg_gcstartpre_I() { - if(gcprocessing) { - // already stall for gc - // send a update pregc information msg to the master core - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs); - } else { - send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs, true); - } - } else { - // the first time to be informed to start gc - gcflag = true; - if(!smemflag) { - // is waiting for response of mem request - // let it return NULL and start gc - bamboo_smem_size = 0; - bamboo_cur_msp = NULL; - smemflag = true; - bamboo_smem_zero_top = NULL; - } - } -} - -INLINE void processmsg_gcstartinit_I() { - gcphase = INITPHASE; -} - -INLINE void processmsg_gcstart_I() { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe88c); -#endif - // set the GC flag - gcphase = MARKPHASE; -} - -INLINE void processmsg_gcstartcompact_I() { - gcblock2fill = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[1]; - gcphase = COMPACTPHASE; -} - -INLINE void processmsg_gcstartmapinfo_I() { - gcphase = MAPPHASE; -} - -INLINE void processmsg_gcstartflush_I() { - gcphase = FLUSHPHASE; -} - -INLINE void processmsg_gcfinishpre_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data3 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a init phase finish msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); -#endif - BAMBOO_EXIT(0xe014); - } - // All cores should do init GC - if(!gcprecheck) { - gcprecheck = true; - } - gccorestatus[data1] = 0; - gcnumsendobjs[0][data1] = data2; - gcnumreceiveobjs[0][data1] = data3; -} - -INLINE void processmsg_gcfinishinit_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a init phase finish msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); -#endif - BAMBOO_EXIT(0xe015); - } -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe88c); - BAMBOO_DEBUGPRINT_REG(data1); -#endif - // All cores should do init GC - if(data1 < NUMCORESACTIVE) { - gccorestatus[data1] = 0; - } -} - -INLINE void processmsg_gcfinishmark_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data3 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a mark phase finish msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); -#endif - BAMBOO_EXIT(0xe016); - } - // all cores should do mark - if(data1 < NUMCORESACTIVE) { - gccorestatus[data1] = 0; - int entry_index = 0; - if(waitconfirm) { - // phase 2 - entry_index = (gcnumsrobjs_index == 0) ? 1 : 0; - } else { - // phase 1 - entry_index = gcnumsrobjs_index; - } - gcnumsendobjs[entry_index][data1] = data2; - gcnumreceiveobjs[entry_index][data1] = data3; - } -} - -INLINE void processmsg_gcfinishcompact_I() { - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg - // return -1 -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/); -#endif - BAMBOO_EXIT(0xe017); - } - int cnum = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[1]; - int filledblocks = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[2]; - int heaptop = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[3]; - int data4 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[4]; - // only gc cores need to do compact - if(cnum < NUMCORES4GC) { - if(COMPACTPHASE == gcphase) { - gcfilledblocks[cnum] = filledblocks; - gcloads[cnum] = heaptop; - } - if(data4 > 0) { - // ask for more mem - int startaddr = 0; - int tomove = 0; - int dstcore = 0; - if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) { - // cache the msg first - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove); - } else { - send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true); - } - } - } else { - gccorestatus[cnum] = 0; - } // if(data4>0) - } // if(cnum < NUMCORES4GC) -} - -INLINE void processmsg_gcfinishmapinfo_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a map phase finish msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); -#endif - BAMBOO_EXIT(0xe018); - } - // all cores should do flush - if(data1 < NUMCORES4GC) { - gccorestatus[data1] = 0; - } -} - - -INLINE void processmsg_gcfinishflush_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a flush phase finish msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); -#endif - BAMBOO_EXIT(0xe019); - } - // all cores should do flush - if(data1 < NUMCORESACTIVE) { - gccorestatus[data1] = 0; - } -} - -INLINE void processmsg_gcmarkconfirm_I() { - if((BAMBOO_NUM_OF_CORE == STARTUPCORE) - || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) { - // wrong core to receive such msg - BAMBOO_EXIT(0xe01a); - } else { - // send response msg, cahce the msg first - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, - gcbusystatus, gcself_numsendobjs, - gcself_numreceiveobjs); - } else { - send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, - gcbusystatus, gcself_numsendobjs, - gcself_numreceiveobjs, true); - } - } -} - -INLINE void processmsg_gcmarkreport_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data3 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data4 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a marked phase finish confirm response msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // wrong core to receive such msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data2); -#endif - BAMBOO_EXIT(0xe01b); - } else { - int entry_index = 0; - if(waitconfirm) { - // phse 2 - numconfirm--; - entry_index = (gcnumsrobjs_index == 0) ? 1 : 0; - } else { - // can never reach here - // phase 1 - entry_index = gcnumsrobjs_index; - } - gccorestatus[data1] = data2; - gcnumsendobjs[entry_index][data1] = data3; - gcnumreceiveobjs[entry_index][data1] = data4; - } -} - -INLINE void processmsg_gcmarkedobj_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a markedObj msg - if(((int *)data1)[6] == INIT) { - // this is the first time that this object is discovered, - // set the flag as DISCOVERED - ((int *)data1)[6] = DISCOVERED; - gc_enqueue_I(data1); - } - // set the remote flag - ((int *)data1)[6] |= REMOTEM; - gcself_numreceiveobjs++; - gcbusystatus = true; -} - -INLINE void processmsg_gcmovestart_I() { - gctomove = true; - gcdstcore = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[1]; - gcmovestartaddr = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[2]; - gcblock2fill = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[3]; -} - -INLINE void processmsg_gcmaprequest_I() { - void * dstptr = NULL; - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); -#ifdef LOCALHASHTBL_TEST - RuntimeHashget(gcpointertbl, data1, &dstptr); -#else - dstptr = mgchashSearch(gcpointertbl, data1); -#endif - if(NULL == dstptr) { - // no such pointer in this core, something is wrong -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); - BAMBOO_DEBUGPRINT_REG(data2); -#endif - BAMBOO_EXIT(0xe01c); - } else { - // send back the mapping info, cache the msg first - if(BAMBOO_CHECK_SEND_MODE()) { - cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr); - } else { - send_msg_3(data2, GCMAPINFO, data1, (int)dstptr, true); - } - } -} - -INLINE void processmsg_gcmapinfo_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - gcmappedobj = msgdata[msgdataindex]; // [2] - MSG_INDEXINC_I(); -#ifdef LOCALHASHTBL_TEST - RuntimeHashadd_I(gcpointertbl, data1, gcmappedobj); -#else - mgchashInsert_I(gcpointertbl, data1, gcmappedobj); -#endif - if(data1 == gcobj2map) { - gcismapped = true; - } -} - -INLINE void processmsg_gcmaptbl_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - gcrpointertbls[data2] = (mgcsharedhashtbl_t *)data1; -} - -INLINE void processmsg_gclobjinfo_I() { - numconfirm--; - - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) { -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data2); -#endif - BAMBOO_EXIT(0xe01d); - } - // store the mark result info - int cnum = data2; - gcloads[cnum] = msgdata[msgdataindex]; - MSG_INDEXINC_I(); // msgdata[3]; - int data4 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - if(gcheaptop < data4) { - gcheaptop = data4; - } - // large obj info here - for(int k = 5; k < data1; k+=2) { - int lobj = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[k++]; - int length = msgdata[msgdataindex]; - MSG_INDEXINC_I(); //msgdata[k++]; - gc_lobjenqueue_I(lobj, length, cnum); - gcnumlobjs++; - } // for(int k = 5; k < msgdata[1];) -} - -INLINE void processmsg_gclobjmapping_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); -#ifdef LOCALHASHTBL_TEST - RuntimeHashadd_I(gcpointertbl, data1, data2); -#else - mgchashInsert_I(gcpointertbl, data1, data2); -#endif - mgcsharedhashInsert_I(gcsharedptbl, data1, data2); -} - -#ifdef GC_PROFILE -INLINE void processmsg_gcprofiles_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data2 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - int data3 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - gc_num_obj += data1; - gc_num_liveobj += data2; - gc_num_forwardobj += data3; - gc_num_profiles--; -} -#endif // GC_PROFILE - -#ifdef GC_CACHE_ADAPT -INLINE void processmsg_gcstartpref_I() { - gcphase = PREFINISHPHASE; -} - -INLINE void processmsg_gcfinishpref_I() { - int data1 = msgdata[msgdataindex]; - MSG_INDEXINC_I(); - // received a flush phase finish msg - if(BAMBOO_NUM_OF_CORE != STARTUPCORE) { - // non startup core can not receive this msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT_REG(data1); -#endif - BAMBOO_EXIT(0xe01e); - } - // all cores should do flush - if(data1 < NUMCORESACTIVE) { - gccorestatus[data1] = 0; - } -} -#endif // GC_CACHE_ADAPT -#endif // #ifdef MULTICORE_GC - -// receive object transferred from other cores -// or the terminate message from other cores -// Should be invoked in critical sections!! -// NOTICE: following format is for threadsimulate version only -// RAW version please see previous description -// format: type + object -// type: -1--stall msg -// !-1--object -// return value: 0--received an object -// 1--received nothing -// 2--received a Stall Msg -// 3--received a lock Msg -// RAW version: -1 -- received nothing -// otherwise -- received msg type -int receiveObject(int send_port_pending) { -#ifdef PROFILE_INTERRUPT - if(!interruptInfoOverflow) { - InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info)); - interruptInfoArray[interruptInfoIndex] = intInfo; - intInfo->startTime = BAMBOO_GET_EXE_TIME(); - intInfo->endTime = -1; - } -#endif -msg: - // get the incoming msgs - if(receiveMsg(send_port_pending) == -1) { - return -1; - } -processmsg: - // processing received msgs - int size = 0; - MSG_REMAINSIZE_I(&size); - if((size == 0) || (checkMsgLength_I(size) == -1)) { - // not a whole msg - // have new coming msg - if((BAMBOO_MSG_AVAIL() != 0) && !msgdatafull) { - goto msg; - } else { - return -1; - } - } - - if(msglength <= size) { - // have some whole msg - MSGTYPE type; - type = msgdata[msgdataindex]; //[0] - MSG_INDEXINC_I(); - msgdatafull = false; - switch(type) { - case TRANSOBJ: { - // receive a object transfer msg - processmsg_transobj_I(); - break; - } // case TRANSOBJ - - case TRANSTALL: { - // receive a stall msg - processmsg_transtall_I(); - break; - } // case TRANSTALL - -// GC version have no lock msgs -#ifndef MULTICORE_GC - case LOCKREQUEST: { - // receive lock request msg, handle it right now - processmsg_lockrequest_I(); - break; - } // case LOCKREQUEST - - case LOCKGROUNT: { - // receive lock grount msg - processmsg_lockgrount_I(); - break; - } // case LOCKGROUNT - - case LOCKDENY: { - // receive lock deny msg - processmsg_lockdeny_I(); - break; - } // case LOCKDENY - - case LOCKRELEASE: { - processmsg_lockrelease_I(); - break; - } // case LOCKRELEASE -#endif // #ifndef MULTICORE_GC - -#ifdef PROFILE - case PROFILEOUTPUT: { - // receive an output profile data request msg - processmsg_profileoutput_I(); - break; - } // case PROFILEOUTPUT - - case PROFILEFINISH: { - // receive a profile output finish msg - processmsg_profilefinish_I(); - break; - } // case PROFILEFINISH -#endif // #ifdef PROFILE - -// GC version has no lock msgs -#ifndef MULTICORE_GC - case REDIRECTLOCK: { - // receive a redirect lock request msg, handle it right now - processmsg_redirectlock_I(); - break; - } // case REDIRECTLOCK - - case REDIRECTGROUNT: { - // receive a lock grant msg with redirect info - processmsg_redirectgrount_I(); - break; - } // case REDIRECTGROUNT - - case REDIRECTDENY: { - // receive a lock deny msg with redirect info - processmsg_redirectdeny_I(); - break; - } // case REDIRECTDENY - - case REDIRECTRELEASE: { - // receive a lock release msg with redirect info - processmsg_redirectrelease_I(); - break; - } // case REDIRECTRELEASE -#endif // #ifndef MULTICORE_GC - - case STATUSCONFIRM: { - // receive a status confirm info - processmsg_statusconfirm_I(); - break; - } // case STATUSCONFIRM - - case STATUSREPORT: { - processmsg_statusreport_I(); - break; - } // case STATUSREPORT - - case TERMINATE: { - // receive a terminate msg - processmsg_terminate_I(); - break; - } // case TERMINATE - - case MEMREQUEST: { - processmsg_memrequest_I(); - break; - } // case MEMREQUEST - - case MEMRESPONSE: { - processmsg_memresponse_I(); - break; - } // case MEMRESPONSE - -#ifdef MULTICORE_GC - // GC msgs - case GCSTARTPRE: { - processmsg_gcstartpre_I(); - break; - } // case GCSTARTPRE - - case GCSTARTINIT: { - processmsg_gcstartinit_I(); - break; - } // case GCSTARTINIT - - case GCSTART: { - // receive a start GC msg - processmsg_gcstart_I(); - break; - } // case GCSTART - - case GCSTARTCOMPACT: { - // a compact phase start msg - processmsg_gcstartcompact_I(); - break; - } // case GCSTARTCOMPACT - - case GCSTARTMAPINFO: { - // received a flush phase start msg - processmsg_gcstartmapinfo_I(); - break; - } // case GCSTARTFLUSH - - case GCSTARTFLUSH: { - // received a flush phase start msg - processmsg_gcstartflush_I(); - break; - } // case GCSTARTFLUSH - - case GCFINISHPRE: { - processmsg_gcfinishpre_I(); - break; - } // case GCFINISHPRE - - case GCFINISHINIT: { - processmsg_gcfinishinit_I(); - break; - } // case GCFINISHINIT - - case GCFINISHMARK: { - processmsg_gcfinishmark_I(); - break; - } // case GCFINISHMARK - - case GCFINISHCOMPACT: { - // received a compact phase finish msg - processmsg_gcfinishcompact_I(); - break; - } // case GCFINISHCOMPACT - - case GCFINISHMAPINFO: { - processmsg_gcfinishmapinfo_I(); - break; - } // case GCFINISHMAPINFO - - case GCFINISHFLUSH: { - processmsg_gcfinishflush_I(); - break; - } // case GCFINISHFLUSH - - case GCFINISH: { - // received a GC finish msg - gcphase = FINISHPHASE; - break; - } // case GCFINISH - - case GCMARKCONFIRM: { - // received a marked phase finish confirm request msg - // all cores should do mark - processmsg_gcmarkconfirm_I(); - break; - } // case GCMARKCONFIRM - - case GCMARKREPORT: { - processmsg_gcmarkreport_I(); - break; - } // case GCMARKREPORT - - case GCMARKEDOBJ: { - processmsg_gcmarkedobj_I(); - break; - } // case GCMARKEDOBJ - - case GCMOVESTART: { - // received a start moving objs msg - processmsg_gcmovestart_I(); - break; - } // case GCMOVESTART - - case GCMAPREQUEST: { - // received a mapping info request msg - processmsg_gcmaprequest_I(); - break; - } // case GCMAPREQUEST - - case GCMAPINFO: { - // received a mapping info response msg - processmsg_gcmapinfo_I(); - break; - } // case GCMAPINFO - - case GCMAPTBL: { - // received a mapping tbl response msg - processmsg_gcmaptbl_I(); - break; - } // case GCMAPTBL - - case GCLOBJREQUEST: { - // received a large objs info request msg - transferMarkResults_I(); - break; - } // case GCLOBJREQUEST - - case GCLOBJINFO: { - // received a large objs info response msg - processmsg_gclobjinfo_I(); - break; - } // case GCLOBJINFO - - case GCLOBJMAPPING: { - // received a large obj mapping info msg - processmsg_gclobjmapping_I(); - break; - } // case GCLOBJMAPPING - -#ifdef GC_PROFILE - case GCPROFILES: { - // received a gcprofiles msg - processmsg_gcprofiles_I(); - break; - } -#endif // GC_PROFILE - -#ifdef GC_CACHE_ADAPT - case GCSTARTPREF: { - // received a gcstartpref msg - processmsg_gcstartpref_I(); - break; - } - - case GCFINISHPREF: { - // received a gcfinishpref msg - processmsg_gcfinishpref_I(); - break; - } -#endif // GC_CACHE_ADAPT -#endif // #ifdef MULTICORE_GC - - default: - break; - } // switch(type) - msglength = BAMBOO_MSG_BUF_LENGTH; - - if((msgdataindex != msgdatalast) || (msgdatafull)) { - // still have available msg - goto processmsg; - } -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe88d); -#endif - - // have new coming msg - if(BAMBOO_MSG_AVAIL() != 0) { - goto msg; - } // TODO - -#ifdef PROFILE_INTERRUPT - if(!interruptInfoOverflow) { - interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME(); - interruptInfoIndex++; - if(interruptInfoIndex == INTERRUPTINFOLENGTH) { - interruptInfoOverflow = true; - } - } -#endif - return (int)type; - } else { - // not a whole msg -#ifndef CLOSE_PRINT - BAMBOO_DEBUGPRINT(0xe88e); -#endif - return -2; - } -} - int enqueuetasks(struct parameterwrapper *parameter, struct parameterwrapper *prevptr, struct ___Object___ *ptr, diff --git a/Robust/src/buildscript b/Robust/src/buildscript index 469552ca..888a2e5a 100755 --- a/Robust/src/buildscript +++ b/Robust/src/buildscript @@ -55,6 +55,9 @@ echo -dsmcaching -enable caching in dsm runtime echo echo BAMBOO Multicore options echo -scheduling do task scheduling +echo "-distributioninfo execute to collect distribution info for simulated annealing in multi-core version" +echo "-disall execute to collect whole distribution" +echo "-disstart specify the start number of distribution information collection" echo -multicore generate multi-core version binary echo "-numcore set the number of cores (should be used together with -multicore), defaultly set as 1" echo "-cacheflush enable cache flush in raw version binary (should be used togethere with -raw)" @@ -68,6 +71,12 @@ echo "-tilera_zlinux generate tilera version binary for Zero-Overhead Linux with echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)" echo "-raw generate raw version binary (should be used together with -multicore)" echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)" +echo "-tilera_memprof build the memprof version (should be used together with -tilera_xx) " +echo -accurateprofile build with accurate profile information including pre/post task processing info +echo -profile_interrupt build with profile information of interrupts +echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" +echo +echo Multicore GC options echo -multicoregc generate multi-core binary with garbage collection echo "-numcore4gc set the number of cores for gc (should be used together with -multicoregc), defaultly set as 0" echo "-gcmem_local set the gc shared memory allocation strategy as local (should be used together with -multicoregc)" @@ -83,10 +92,7 @@ echo "-gclargepagesize set the gc shared memory to use large page size (should b echo "-gclargesharedheap(2) set the gc shared memory as large (should be used together with -multicoregc)" echo "-gccacheadapt setup as cacheadaptable mode (should be used together with -multicoregc)" echo -gcprofile build with gcprofile options -echo "-tilera_memprof build the memprof version (should be used together with -tilera_xx) " -echo -accurateprofile build with accurate profile information including pre/post task processing info -echo -profile_interrupt build with profile information of interrupts -echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" +echo -mgc generate Multicore GC binary without task stuff echo echo Other options echo -abcclose turnoff array boundary checks @@ -126,9 +132,6 @@ echo -instructionfailures inject code for instructionfailures echo -profile build with profile options echo "-enable-assertions execute assert statements during compilation" echo -justanalyze exit after compiler analyses complete -echo "-distributioninfo execute to collect distribution info for simulated annealing in multi-core version" -echo "-disall execute to collect whole distribution" -echo "-disstart specify the start number of distribution information collection" echo -assembly generate assembly echo -recovery compile recovery code echo -dsmtask support work and task class library @@ -198,6 +201,7 @@ GCCACHEADAPTFLAG=false GCCACHEADAPTPOLICYFLAG=false GCCACHEADAPTPOLICY='' GCCACHESAMPLINGFLAG=false +MGCFLAG=false USEDMALLOC=false THREADFLAG=false FASTCHECK=false @@ -493,6 +497,9 @@ shift elif [[ $1 = '-gccachesampling' ]] then GCCACHESAMPLINGFLAG=true +elif [[ $1 = '-mgc' ]] +then +MGCFLAG=true elif [[ $1 = '-dmalloc' ]] then USEDMALLOC=true @@ -832,6 +839,8 @@ cp ../Runtime/*.s ./ mkdir ./coreprof cp ../Runtime/coreprof/*.c ./coreprof/ cp ../Runtime/coreprof/*.h ./coreprof/ +cp $BAMBOORUNTIME/*.c ./ +cp $BAMBOORUNTIME/*.h ./ cp $BAMBOORUNTIME/RAW/*.c ./ cp $BAMBOORUNTIME/RAW/*.h ./ cp $BAMBOORUNTIME/RAW/*.S ./ @@ -921,6 +930,11 @@ then #INTERRUPT version TILERACFLAGS="${TILERACFLAGS} -DINTERRUPT" fi #INTERRUPT version +if $MGCFLAG +then #MGCFLAG +TILERACFLAGS="${TILERACFLAGS} -DMGC" +fi + if $MULTICOREGCFLAG then #MULTICOREGC version TILERACFLAGS="${TILERACFLAGS} -DMULTICORE_GC -D${GCCORES}" @@ -1054,16 +1068,18 @@ cp ../Runtime/coreprof/coreprof.c ./coreprof/ cp ../Runtime/coreprof/coreprof.h ./coreprof/ cp $BAMBOORUNTIME/multicoretask.c ./ cp $BAMBOORUNTIME/multicoreruntime.c ./ -cp $BAMBOORUNTIME/GCSharedHash.c ./ +cp $BAMBOORUNTIME/multicoremem.c ./ cp $BAMBOORUNTIME/multicoregarbage.c ./ +cp $BAMBOORUNTIME/GCSharedHash.c ./ cp $BAMBOORUNTIME/MGCHash.c ./ cp $BAMBOORUNTIME/multicoreruntime.h ./ -cp $BAMBOORUNTIME/GCSharedHash.h ./ -cp $BAMBOORUNTIME/multicoregc.h ./ +cp $BAMBOORUNTIME/multicoremem.h ./ cp $BAMBOORUNTIME/multicoregarbage.h ./ +cp $BAMBOORUNTIME/multicorecache.h ./ +cp $BAMBOORUNTIME/multicoregc.h ./ cp $BAMBOORUNTIME/multicorehelper.h ./ +cp $BAMBOORUNTIME/GCSharedHash.h ./ cp $BAMBOORUNTIME/MGCHash.h ./ -cp $BAMBOORUNTIME/multicorecache.h ./ cp ../Tilera/Runtime/*.c ./ cp ../Tilera/Runtime/*.h ./ cp ../Tilera/Runtime/$TILERA_INDIR/*.c ./