From 673024d083ae4f552baa92db5a2b5e4ede41d054 Mon Sep 17 00:00:00 2001
From: jzhou <jzhou>
Date: Mon, 20 Sep 2010 00:37:27 +0000
Subject: [PATCH] Remove the boundary of cache access sampling. Add output of
 the changed cache policy of the shared pages. Also fix some bugs.

---
 Robust/src/Runtime/bamboo/multicorecache.h   |  1 -
 Robust/src/Runtime/bamboo/multicoregarbage.c | 78 ++++++++++++++------
 Robust/src/Runtime/bamboo/multicoretask.c    |  4 +-
 Robust/src/buildscript                       |  9 +++
 4 files changed, 66 insertions(+), 26 deletions(-)

diff --git a/Robust/src/Runtime/bamboo/multicorecache.h b/Robust/src/Runtime/bamboo/multicorecache.h
index 2c56825b..dc5daac7 100644
--- a/Robust/src/Runtime/bamboo/multicorecache.h
+++ b/Robust/src/Runtime/bamboo/multicorecache.h
@@ -5,7 +5,6 @@
 #ifdef GC_CACHE_ADAPT
 #define GC_CACHE_SAMPLING_UNIT 100000000
 #define GC_TILE_TIMER_EVENT_SETTING 100000000  
-#define GC_NUM_SAMPLING 24
 
 // should be consistent with multicoreruntime.h
 typedef union
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.c b/Robust/src/Runtime/bamboo/multicoregarbage.c
index b5897919..f277a6d1 100644
--- a/Robust/src/Runtime/bamboo/multicoregarbage.c
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.c
@@ -395,6 +395,13 @@ inline int hostcore(void * ptr) {
   return host;
 } // int hostcore(void * ptr)
 
+inline void cpu2coords(int coren,
+	                   int * x,
+					   int * y) {
+  *x = bamboo_cpu2coords[2*coren];
+  *y = bamboo_cpu2coords[2*coren+1];
+} // void cpu2coords(...)
+
 inline bool isLocal(void * ptr) {
   // check if a pointer is in shared heap on this core
   return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
@@ -1958,8 +1965,8 @@ innermoveobj:
 	  ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va))/
 	  ((float)(BAMBOO_PAGE_SIZE));
 	for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-	  ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
-		 )[gc_cache_revise_infomation.to_page_index] += (int)(
+	  ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
+		gc_cache_revise_infomation.to_page_index] += (int)(
 		   ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
 		gc_cache_revise_infomation.orig_page_index]*tmp_factor);
 	  // TODO
@@ -2044,13 +2051,13 @@ innermoveobj:
 		((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va))/
 		((float)(BAMBOO_PAGE_SIZE));
 	  for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-		((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
-		 )[gc_cache_revise_infomation.to_page_index] += (int)(
+		((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
+		  gc_cache_revise_infomation.to_page_index] += (int)(
 		  ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
 		  gc_cache_revise_infomation.orig_page_index]*tmp_factor);
 		// TODO
-/*	  if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
-		tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
+	  /*if((gc_cache_revise_infomation.to_page_index*(BAMBOO_PAGE_SIZE)+gcbaseva) == 0x10f10000) {
+		tprintf("0x10f10000 <- %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.orig_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
 	  }*/
 	  }
 	  // prepare for an new to page
@@ -2127,13 +2134,13 @@ innermoveobj:
 		((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va))/
 		((float)(BAMBOO_PAGE_SIZE));
 	  for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-		((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
-		 )[gc_cache_revise_infomation.to_page_index] += (int)(
+		((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
+		  gc_cache_revise_infomation.to_page_index] += (int)(
 		  ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
 		  gc_cache_revise_infomation.orig_page_index]*tmp_factor);
       // TODO
-/*	  if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
-		tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
+	  /*if((gc_cache_revise_infomation.to_page_index*(BAMBOO_PAGE_SIZE)+gcbaseva) == 0x10f10000) {
+		tprintf("0x10f10000 <- %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.orig_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
 	  }*/
 	  }
 	  // prepare for an new to page
@@ -2250,10 +2257,14 @@ innercompact:
 	  ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va))/
 	  ((float)(BAMBOO_PAGE_SIZE));
 	for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-	  ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
-	   )[gc_cache_revise_infomation.to_page_index] += (int)(
+	  ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r))[
+		gc_cache_revise_infomation.to_page_index] += (int)(
 		((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
 		gc_cache_revise_infomation.orig_page_index]*tmp_factor);
+	  // TODO
+	  /*if((gc_cache_revise_infomation.to_page_index*(BAMBOO_PAGE_SIZE)+gcbaseva) == 0x10f10000) {
+		tprintf("0x10f10000 <- %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.orig_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
+	  }*/
 	}
 #endif // GC_CACHE_ADAPT
   // if no objs have been compact, do nothing,
@@ -2418,7 +2429,7 @@ inline void compact() {
 	(BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
   gc_cache_revise_infomation.orig_page_index = 
 	orig->blockbase/(BAMBOO_PAGE_SIZE);
-#endif
+#endif // GC_CACHE_ADAPT
 
   int filledblocks = 0;
   INTPTR heaptopptr = 0;
@@ -2876,7 +2887,6 @@ void cacheAdapt_gc(bool isgccachestage) {
 
 // the master core decides how to adapt cache strategy for the mutator 
 // according to collected statistic data
-extern int gc_num_sampling;
 
 // make all pages hfh
 int cacheAdapt_policy_h4h(){
@@ -2941,7 +2951,7 @@ int cacheAdapt_policy_hotest(){
 
 	for(int i = 0; i < NUMCORESACTIVE; i++) {
 	  int * local_tbl = (int *)((void *)gccachesamplingtbl_r
-		  +page_num*sizeof(float)*i);
+		  +page_num*sizeof(int)*i);
 	  int freq = local_tbl[page_index];
 	  // TODO
 	  // check the freqency, decide if this page is hot for the core
@@ -2995,7 +3005,7 @@ int cacheAdapt_policy_dominate(){
 	
 	for(int i = 0; i < NUMCORESACTIVE; i++) {
 	  int * local_tbl = (int *)((void *)gccachesamplingtbl_r
-		  +page_num*sizeof(float)*i);
+		  +page_num*sizeof(int)*i);
 	  int freq = local_tbl[page_index];
 	  totalfreq += freq;
 	  // TODO
@@ -3097,7 +3107,7 @@ int cacheAdapt_policy_overload(){
 	
 	for(int i = 0; i < NUMCORESACTIVE; i++) {
 	  int * local_tbl = (int *)((void *)gccachesamplingtbl_r
-		  +page_num*sizeof(float)*i);
+		  +page_num*sizeof(int)*i);
 	  int freq = local_tbl[page_index];
 	  totalfreq += freq;
 	  // TODO
@@ -3106,6 +3116,10 @@ int cacheAdapt_policy_overload(){
 		hotfreq = freq;
 		hotestcore = i;
 	  }
+	  // TODO
+	  /*if(page_sva == 0x10f10000) {
+		if(freq != 0) tprintf("0x10f10000 core %d, %d\n", i, freq);
+	  }*/
 	}
 	// TODO
 	// Decide the cache strategy for this page
@@ -3135,6 +3149,10 @@ int cacheAdapt_policy_overload(){
 	core2heavypages[hotestcore][3*index+2] = totalfreq;
 	core2heavypages[hotestcore][3*index+1] = tmp_p-1;
 	core2heavypages[hotestcore][0]++;
+	// TODO
+	//if(page_sva == 0x10f10000) {
+	  //tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
+	//}
   }
 
   // Check the workload of each core
@@ -3190,7 +3208,7 @@ int cacheAdapt_policy_crowd(){
 	
 	for(int i = 0; i < NUMCORESACTIVE; i++) {
 	  int * local_tbl = (int *)((void *)gccachesamplingtbl_r
-		  +page_num*sizeof(float)*i);
+		  +page_num*sizeof(int)*i);
 	  int freq = local_tbl[page_index];
 	  totalfreq += freq;
 	  // TODO
@@ -3293,14 +3311,15 @@ inner_crowd:
 } // int cacheAdapt_policy_overload()
 
 void cacheAdapt_master() {
+  int numchanged = 0;
   // check the statistic data
   // for each page, decide the new cache strategy
-  //int numchanged = cacheAdapt_policy_h4h();
-  //int numchanged = cacheAdapt_policy_local();
-  //int numchanged = cacheAdapt_policy_hotest();
-  //int numchanged = cacheAdapt_policy_dominate();
-  int numchanged = cacheAdapt_policy_overload();
-  //int numchanged = cacheAdapt_policy_crowd();
+  numchanged = cacheAdapt_policy_h4h();
+  //numchanged = cacheAdapt_policy_local();
+  //numchanged = cacheAdapt_policy_hotest();
+  //numchanged = cacheAdapt_policy_dominate();
+  //numchanged = cacheAdapt_policy_overload();
+  //numchanged = cacheAdapt_policy_crowd();
   *gccachepolicytbl = numchanged;
   // TODO
   //if(numchanged > 0) tprintf("=================\n");
@@ -3882,6 +3901,9 @@ inline void gc_master(struct garbagelist * stackptr) {
 #endif
   // cache adapt phase
   cacheAdapt_mutator();
+#ifdef GC_CACHE_ADAPT_OUTPUT
+  bamboo_output_cache_policy();
+#endif
   cacheAdapt_gc(false);
   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
   while(PREFINISHPHASE == gcphase) {
@@ -4024,10 +4046,12 @@ pregccheck:
 	}
 #endif
 #ifdef GC_CACHE_ADAPT
+#ifdef GC_CACHE_SAMPLING
     // disable the timer interrupt
     bamboo_mask_timer_intr();
     // get the sampling data 
     bamboo_output_dtlb_sampling();
+#endif // GC_CACHE_SAMPLING
 #endif // GC_CACHE_ADAPT
 	gcprocessing = true;
 	gc_master(stackptr);
@@ -4050,12 +4074,14 @@ pregccheck:
 	}
 #endif
 #ifdef GC_CACHE_ADAPT
+#ifdef GC_CACHE_SAMPLING
 	// disable the timer interrupt
 	bamboo_mask_timer_intr();
 	if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 	  // get the sampling data 
 	  bamboo_output_dtlb_sampling();
 	}
+#endif // GC_CACHE_SAMPLING
 #endif // GC_CACHE_ADAPT
     gcprocessing = true;
     gc_collect(stackptr);
@@ -4085,12 +4111,14 @@ pregccheck:
 	}
 #endif
 #ifdef GC_CACHE_ADAPT
+#ifdef GC_CACHE_SAMPLING
 	// disable the timer interrupt
 	bamboo_mask_timer_intr();
 	if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 	  // get the sampling data 
 	  bamboo_output_dtlb_sampling();
 	}
+#endif // GC_CACHE_SAMPLING
 #endif // GC_CACHE_ADAPT
     // not a gc core, should wait for gcfinish msg
     gcprocessing = true;
@@ -4104,6 +4132,7 @@ pregccheck:
     gcprocessing = false;
   }
 #ifdef GC_CACHE_ADAPT
+#ifdef GC_CACHE_SAMPLING
   // reset the sampling arrays
   bamboo_dtlb_sampling_reset();
   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
@@ -4118,6 +4147,7 @@ pregccheck:
   // enable the timer interrupt
   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); 
   bamboo_unmask_timer_intr();
+#endif // GC_CACHE_SAMPLING
 #endif // GC_CACHE_ADAPT
   return true;
 } // void gc(struct garbagelist * stackptr)
diff --git a/Robust/src/Runtime/bamboo/multicoretask.c b/Robust/src/Runtime/bamboo/multicoretask.c
index c2e55abe..da111ec9 100644
--- a/Robust/src/Runtime/bamboo/multicoretask.c
+++ b/Robust/src/Runtime/bamboo/multicoretask.c
@@ -378,9 +378,11 @@ void initruntimedata() {
 #ifdef GC_CACHE_ADAPT
   gccachestage = false;
   // enable the timer interrupt
-  bamboo_tile_timer_set_next_event(500000000); // TODO
+#ifdef GC_CACHE_SAMPLING
+  bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); // TODO
   bamboo_unmask_timer_intr();
   bamboo_dtlb_sampling_process();
+#endif // GC_CACHE_SAMPLING
 #endif // GC_CACHE_ADAPT
 #else
   // create the lock table, lockresult table and obj queue
diff --git a/Robust/src/buildscript b/Robust/src/buildscript
index 334cd15d..f7609257 100755
--- a/Robust/src/buildscript
+++ b/Robust/src/buildscript
@@ -191,6 +191,7 @@ GCLARGESHAREDHEAPFLAG=false;
 GCSMALLPAGESIZEFLAG2=false;
 GCLARGESHAREDHEAPFLAG2=false;
 GCCACHEADAPTFLAG=false
+GCCACHESAMPLINGFLAG=false
 USEDMALLOC=false
 THREADFLAG=false
 FASTCHECK=false
@@ -475,6 +476,9 @@ GCLARGESHAREDHEAPFLAG2=true
 elif [[ $1 = '-gccacheadapt' ]]
 then
 GCCACHEADAPTFLAG=true
+elif [[ $1 = '-gccachesampling' ]]
+then
+GCCACHESAMPLINGFLAG=true
 elif [[ $1 = '-dmalloc' ]]
 then
 USEDMALLOC=true
@@ -976,6 +980,11 @@ then # GC_CACHE_ADAPT version
 TILERACFLAGS="${TILERACFLAGS} -DGC_CACHE_ADAPT"
 fi
 
+if $GCCACHESAMPLINGFLAG
+then # GC_CACHE_ADAPT version
+TILERACFLAGS="${TILERACFLAGS} -DGC_CACHE_SAMPLING"
+fi
+
 cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$MAKEFILE ./Makefile
 if $TILERABMEFLAG
 then # TILERABMEFLAG
-- 
2.34.1