task t1(StartupObject s{initialstate}) {
//System.printString("task t1\n");
- int threadnum = 56; // 62; // 56;
+ int threadnum = 62; // 56;
int size = threadnum * 25;
Composer comp = new Composer(threadnum, size){compose};
RayTracer rt = new RayTracer();
void * p = NULL;
int isize = size;
BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+#ifdef MULTICORE_GC
+ extern bool gc_localheap_s;
+inermycalloc_i:
+ p = gc_localheap_s ? BAMBOO_LOCAL_MEM_CALLOC_S(m, isize) :
+ BAMBOO_LOCAL_MEM_CALLOC(m, isize);
+#else
p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
+#endif
if(p == NULL) {
+#ifdef MULTICORE_GC
+ if(!gc_localheap_s) {
+ gc_localheap_s = true;
+ goto inermycalloc_i;
+ }
+#endif
BAMBOO_EXIT(0xc001);
}
BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
#ifdef DEBUG
tprintf("ask for local mem: %x \n", isize);
#endif
+#ifdef MULTICORE_GC
+ extern bool gc_localheap_s;
+inermycalloc_i:
+ p = gc_localheap_s ? BAMBOO_LOCAL_MEM_CALLOC_S(m, isize) :
+ BAMBOO_LOCAL_MEM_CALLOC(m, isize);
+#else
p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
+#endif
#ifdef DEBUG
tprintf("new obj in local mem: %x, %x \n", p, isize);
#endif
if(p == NULL) {
+#ifdef MULTICORE_GC
+ if(!gc_localheap_s) {
+ gc_localheap_s = true;
+ goto inermycalloc_i;
+ }
+#endif
BAMBOO_EXIT(0xc004);
}
return p;
}
void myfree(void * ptr) {
- BAMBOO_LOCAL_MEM_FREE(ptr);
+#ifdef MULTICORE_GC
+ if(ptr >= BAMBOO_LOCAL_HEAP_START_VA ) {
+#endif
+ BAMBOO_LOCAL_MEM_FREE(ptr);
+#ifdef MULTICORE_GC
+ } else if(ptr >= BAMBOO_LOCAL_HEAP_START_VA_S) {
+ BAMBOO_LOCAL_MEM_FREE_S(ptr);
+ }
+#endif
return;
}
#ifdef RAWPATH // TODO GC_DEBUG
printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
//dumpSMem();
+#endif
+#ifdef GC_FLUSH_DTLB
+ if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
+ BAMBOO_CLEAN_DTLB();
+ gc_num_flush_dtlb++;
+ }
#endif
gcprocessing = true;
gcphase = INITPHASE;
gc_num_forwardobj = 0;
#endif // GC_PROFLIE_S*/
} else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
+#ifdef GC_FLUSH_DTLB
+ if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
+ BAMBOO_CLEAN_DTLB();
+ gc_num_flush_dtlb++;
+ }
+#endif
gcprocessing = true;
gc_collect(stackptr);
gcflag = false;
gcprocessing = false;
} else {
+#ifdef GC_FLUSH_DTLB
+ if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
+ BAMBOO_CLEAN_DTLB();
+ gc_num_flush_dtlb++;
+ }
+#endif
// not a gc core, should wait for gcfinish msg
gcprocessing = true;
gc_nocollect(stackptr);
// let each gc core to have one big block, this is very important
// for the computation of NUMBLOCKS(s, n), DO NOT change this!
+#ifdef GC_FLUSH_DTLB
+#define GC_NUM_FLUSH_DTLB 1
+int gc_num_flush_dtlb;
+#endif
+
#define NUMPTRS 100
// for GC profile
//((unsigned long long int)(3.0 * 1024 * 1024 * 1024)) // 3G
#endif // GC_DEBUG
+#ifdef MULTICORE_GC
+volatile bool gc_localheap_s;
+#endif
+
#ifdef MULTICORE_GC
#include "multicoregarbage.h"
// request response //
// BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of //
// whose size in bytes is y on local memory //
+// which is given by the hypervisor //
// BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory //
// BAMBOO_LOCAL_MEM_CLOSE(): close the local heap //
+// BAMBOO_LOCAL_MEM_CALLOC_S(x, y): allocate an array of x elements each of//
+// whose size in bytes is y on local //
+// memory which is not from the hypervisor//
+// but is allocated from the free memory //
+// BAMBOO_LOCAL_MEM_FREE_S(x): free space with ptr x on self-allocated //
+// local memory //
+// BAMBOO_LOCAL_MEM_CLOSE_S(): close the self-allocated local heap //
// BAMBOO_SHARE_MEM_CALLOC_I(x, y): allocate an array of x elements each of//
// whose size in bytes is y on shared memory//
-// BAMBOO_SHARE_MEM_CLOSE(): close the shared heap //
+// BAMBOO_SHARE_MEM_CLOSE(): close the shared heap //
// BAMBOO_CACHE_LINE_SIZE: the cache line size //
// BAMBOO_CACHE_LINE_MASK: mask for a cache line //
// BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with //
// hint, the processor will not fetch the //
// current content of the memory and directly //
// write //
+// BAMBOO_CLEAN_DTLB(): zero-out all the dtlb entries //
/////////////////////////////////////////////////////////////////////////////
#endif // #ifdef MULTICORE
gc_num_forwardobj = 0;
gc_num_profiles = NUMCORESACTIVE - 1;
#endif
+#ifdef GC_FLUSH_DTLB
+ gc_num_flush_dtlb = 0;
+#endif
+ gc_localheap_s = false;
#else
// create the lock table, lockresult table and obj queue
locktable.size = 20;
BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time);
//BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test
+#ifdef GC_FLUSH_DTLB
+ BAMBOO_DEBUGPRINT_REG(gc_num_flush_dtlb);
+#endif
#ifndef BAMBOO_MEMPROF
BAMBOO_DEBUGPRINT(0xbbbbbbbb);
#endif
EXITAFTERANALYSIS=false
ASSEMBLY=false
GCCORES=''
-GC1COREFLAG=false
TILERAN1COREFLAG=false
TILERA56COREFLAG=false
then
JAVAOPTS="$JAVAOPTS -numcore4gc $2"
GCCORES="GC_$2"
-if [[ "$2" -eq "1" ]]
-then
-GC1COREFLAG=true
-fi
shift
elif [[ $1 = '-raw' ]]
then
MAKEFILE="Makefile.tilera.$TILERACONFIG"
SIMHVC="sim.hvc.$TILERACONFIG"
PCIHVC="pci.hvc.$TILERACONFIG"
-if $GC1COREFLAG
-then # 1-core gc
- if $TILERAN1COREFLAG
- then # not only with 1 core
- PCIHVC="$PCIHVC.1gc"
- fi
-fi
if $TILERA56COREFLAG
then
PCIHVC="$PCIHVC.56"