From 14a9a9160ea9ad6d61f139b8b34d7e75cc80e689 Mon Sep 17 00:00:00 2001
From: jzhou <jzhou>
Date: Wed, 13 May 2009 23:14:44 +0000
Subject: [PATCH] Updates to support multicore version runtime of Tilera. Also
 fix a bug in BuildCodeMulticore.java: it faulty omitted codes which
 repeatedly transfer objects to the same core. In current scheduling, to send
 4 objs to 2 cores, it could be specified as an array [0,0,1,1] indicating the
 target cores for each object. Originaly the array was generated as
 [0,0,1,null] which caused missing of objects.

---
 Robust/src/IR/Flat/BuildCodeMultiCore.java | 26 +++++---
 Robust/src/Runtime/mem.c                   | 11 +++-
 Robust/src/Runtime/mem.h                   |  3 +-
 Robust/src/Runtime/multicoreruntime.h      | 22 ++++---
 Robust/src/Runtime/multicoretask.c         | 77 +++++++++++++++++++++-
 5 files changed, 115 insertions(+), 24 deletions(-)

diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java
index c6dabfc5..d058c95f 100644
--- a/Robust/src/IR/Flat/BuildCodeMultiCore.java
+++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java
@@ -1048,7 +1048,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	  output.println("}");
 	}
 
-	Vector<TranObjInfo> sendto = new Vector<TranObjInfo>();
+	//Vector<TranObjInfo> sendto = new Vector<TranObjInfo>();
 	Queue<Integer> queue = null;
 	if(targetCoreTbl != null) {
 	  queue = targetCoreTbl.get(tmpFState);
@@ -1106,7 +1106,9 @@ public class BuildCodeMultiCore extends BuildCode {
 		} else {
 		  tmpinfo.fs = tmpFState;
 		}
-		if(!contains(sendto, tmpinfo)) {
+		// fixed 05/12/09, it's very likely to repeatedly send an object to the same core
+		// as sheduled
+		//if(!contains(sendto, tmpinfo)) {
 		  qinfo = outputtransqueues(tmpinfo.fs, targetcore, output);
 		  output.println("tmpObjInfo = RUNMALLOC(sizeof(struct transObjInfo));");
 		  output.println("tmpObjInfo->objptr = (void *)" + tmpinfo.name + ";");
@@ -1114,8 +1116,8 @@ public class BuildCodeMultiCore extends BuildCode {
 		  output.println("tmpObjInfo->queues = " + qinfo.qname + ";");
 		  output.println("tmpObjInfo->length = " + qinfo.length + ";");
 		  output.println("addNewItem(totransobjqueue, (void*)tmpObjInfo);");
-		  sendto.add(tmpinfo);
-		}
+		  //sendto.add(tmpinfo);
+		//}
 		output.println("}");
 	      }
 	      output.println("break;");
@@ -1142,7 +1144,9 @@ public class BuildCodeMultiCore extends BuildCode {
 	    } else {
 	      tmpinfo.fs = tmpFState;
 	    }
-	    if(!contains(sendto, tmpinfo)) {
+		// fixed 05/12/09, it's very likely to repeatedly send an object to the same core
+		// as sheduled
+	    //if(!contains(sendto, tmpinfo)) {
 	      qinfo = outputtransqueues(tmpinfo.fs, targetcore, output);
 	      output.println("tmpObjInfo = RUNMALLOC(sizeof(struct transObjInfo));");
 	      output.println("tmpObjInfo->objptr = (void *)" + tmpinfo.name + ";");
@@ -1150,8 +1154,8 @@ public class BuildCodeMultiCore extends BuildCode {
 	      output.println("tmpObjInfo->queues = " + qinfo.qname + ";");
 	      output.println("tmpObjInfo->length = " + qinfo.length + ";");
 	      output.println("addNewItem(totransobjqueue, (void*)tmpObjInfo);");
-	      sendto.add(tmpinfo);
-	    }
+	      //sendto.add(tmpinfo);
+	    //}
 	    output.println("}");
 	  }
 	  output.println("/* increase index*/");
@@ -1196,7 +1200,9 @@ public class BuildCodeMultiCore extends BuildCode {
 	    } else {
 	      tmpinfo.fs = tmpFState;
 	    }
-	    if(!contains(sendto, tmpinfo)) {
+		// fixed 05/12/09, it's very likely to repeatedly send an object to the same core
+		// as sheduled
+	    //if(!contains(sendto, tmpinfo)) {
 	      qinfo = outputtransqueues(tmpinfo.fs, targetcores.elementAt(i), output);
 	      output.println("tmpObjInfo = RUNMALLOC(sizeof(struct transObjInfo));");
 	      output.println("tmpObjInfo->objptr = (void *)" + tmpinfo.name + ";");
@@ -1204,8 +1210,8 @@ public class BuildCodeMultiCore extends BuildCode {
 	      output.println("tmpObjInfo->queues = " + qinfo.qname + ";");
 	      output.println("tmpObjInfo->length = " + qinfo.length + ";");
 	      output.println("addNewItem(totransobjqueue, (void*)tmpObjInfo);");
-	      sendto.add(tmpinfo);
-	    }
+	      //sendto.add(tmpinfo);
+	    //}
 	    output.println("}");
 	    //}
 	  }
diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c
index 5ef956b0..04c8115a 100644
--- a/Robust/src/Runtime/mem.c
+++ b/Robust/src/Runtime/mem.c
@@ -14,6 +14,15 @@
    }*/
 
 void * mycalloc(int m, int size) {
+  void * p = NULL;
+  int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
+  BAMBOO_START_CRITICAL_SECTION_MEM();
+  p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
+  BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+  return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
+}
+
+void * mycalloc_share(int m, int size) {
   void * p = NULL;
   int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
   BAMBOO_START_CRITICAL_SECTION_MEM();
@@ -25,7 +34,7 @@ void * mycalloc(int m, int size) {
 void * mycalloc_i(int m, int size) {
   void * p = NULL;
   int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
-  p = BAMBOO_SHARE_MEM_CALLOC(m, isize); // calloc(m, isize);
+  p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
 }
 
diff --git a/Robust/src/Runtime/mem.h b/Robust/src/Runtime/mem.h
index 954019ee..8e033a78 100644
--- a/Robust/src/Runtime/mem.h
+++ b/Robust/src/Runtime/mem.h
@@ -24,9 +24,10 @@
 //#define PTR(x) (x)
 #else
 void * mycalloc(int m, int size);
+void * mycalloc_share(int m, int size);
 void * mycalloc_i(int m, int size);
 void myfree(void * ptr);
-#define FREEMALLOC(x) mycalloc(1,x)
+#define FREEMALLOC(x) mycalloc_share(1,x)
 #define RUNMALLOC(x) mycalloc(1,x) // handle interruption inside
 #define RUNMALLOC_I(x) mycalloc_i(1,x) // with interruption blocked beforehand
 #define RUNFREE(x) myfree(x);
diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h
index 164a1b8d..e6bf0c63 100644
--- a/Robust/src/Runtime/multicoreruntime.h
+++ b/Robust/src/Runtime/multicoreruntime.h
@@ -95,16 +95,16 @@ inline void initCommunication(void) __attribute__((always_inline));
 inline void fakeExecution(void) __attribute__((always_inline));
 inline void terminate(void) __attribute__((always_inline));
 
-inline void send_msg_1(int targetcore, int n0) __attribute__((always_inline));
-inline void send_msg_2(int targetcore, int n0, int n1) __attribute__((always_inline));
-inline void send_msg_3(int targetcore, int n0, int n1, int n2) __attribute__((always_inline));
-inline void send_msg_4(int targetcore, int n0, int n1, int n2, int n3) __attribute__((always_inline));
-inline void send_msg_5(int targetcore, int n0, int n1, int n2, int n3, int n4) __attribute__((always_inline));
-inline void send_msg_6(int targetcore, int n0, int n1, int n2, int n3, int n4, int n5) __attribute__((always_inline));
-inline void cache_msg_2(int targetcore, int n0, int n1) __attribute__((always_inline));
-inline void cache_msg_3(int targetcore, int n0, int n1, int n2) __attribute__((always_inline));
-inline void cache_msg_4(int targetcore, int n0, int n1, int n2, int n3) __attribute__((always_inline));
-inline void cache_msg_6(int targetcore, int n0, int n1, int n2, int n3, int n4, int n5) __attribute__((always_inline));
+inline void send_msg_1(int targetcore, unsigned long n0) __attribute__((always_inline));
+inline void send_msg_2(int targetcore, unsigned long n0, unsigned long n1) __attribute__((always_inline));
+inline void send_msg_3(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2) __attribute__((always_inline));
+inline void send_msg_4(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3) __attribute__((always_inline));
+inline void send_msg_5(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4) __attribute__((always_inline));
+inline void send_msg_6(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4, unsigned long n5) __attribute__((always_inline));
+inline void cache_msg_2(int targetcore, unsigned long n0, unsigned long n1) __attribute__((always_inline));
+inline void cache_msg_3(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2) __attribute__((always_inline));
+inline void cache_msg_4(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3) __attribute__((always_inline));
+inline void cache_msg_6(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4, unsigned long n5) __attribute__((always_inline));
 inline void transferObject(struct transObjInfo * transObj);
 inline int receiveMsg(void) __attribute__((always_inline));
 
@@ -122,6 +122,8 @@ inline void profileTaskEnd(void) __attribute__((always_inline));
 //  BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core               //
 //  BAMBOO_DEBUGPRINT(x): print out integer x                                      //
 //  BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                        //
+//  BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of whose   //
+//                                 size in bytes is y on local memory              //
 //  BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of whose   //
 //                                 size in bytes is y on shared memory             //
 //  BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                                      //
diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c
index 93a16e92..b47a6203 100644
--- a/Robust/src/Runtime/multicoretask.c
+++ b/Robust/src/Runtime/multicoretask.c
@@ -214,9 +214,12 @@ inline void run(void * arg) {
 		  while(!isEmpty(&objqueue)) {
 			  void * obj = NULL;
 			  BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE();
+#ifdef DEBUG
+			  BAMBOO_DEBUGPRINT(0xf001);
+#endif
 #ifdef PROFILE
 			  //isInterrupt = false;
-#endif  
+#endif 
 #ifdef DEBUG
 			  BAMBOO_DEBUGPRINT(0xeee1);
 #endif
@@ -260,8 +263,14 @@ inline void run(void * arg) {
 #ifdef DEBUG
 					  BAMBOO_DEBUGPRINT_REG(taskindex);
 					  BAMBOO_DEBUGPRINT_REG(paramindex);
+					  struct ___Object___ * tmpptr = (struct ___Object___ *)obj;
+	  tprintf("Process %x(%d): receive obj %x(%lld), ptrflag %x\n", corenum, corenum, (int)obj, (long)obj, tmpptr->flag);
 #endif
+
 					  enqueueObject_I(obj, queues, 1);
+#ifdef DEBUG
+	  BAMBOO_DEBUGPRINT_REG(hashsize(activetasks));
+#endif
 				  }
 				  removeItem(&objqueue, objitem);
 				  releasewritelock_I(obj);
@@ -277,9 +286,15 @@ inline void run(void * arg) {
 				  //isInterrupt = true;
 #endif
 				  BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
+#ifdef DEBUG
+				  BAMBOO_DEBUGPRINT(0xf000);
+#endif
 				  break;
 			  }
 			  BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
+#ifdef DEBUG
+			  BAMBOO_DEBUGPRINT(0xf000);
+#endif
 		  }
 #ifdef PROFILE
 		      if(isChecking) {
@@ -304,8 +319,12 @@ inline void run(void * arg) {
 						  (waitconfirm && (numconfirm == 0))) {
 #ifdef DEBUG
 					  BAMBOO_DEBUGPRINT(0xee04);
+					  BAMBOO_DEBUGPRINT_REG(waitconfirm);
 #endif
 					  BAMBOO_START_CRITICAL_SECTION_STATUS();
+#ifdef DEBUG
+					  BAMBOO_DEBUGPRINT(0xf001);
+#endif
 					  corestatus[corenum] = 0;
 					  numsendobjs[corenum] = self_numsendobjs;
 					  numreceiveobjs[corenum] = self_numreceiveobjs;
@@ -373,6 +392,9 @@ inline void run(void * arg) {
 								  // out progiling data
 #ifdef PROFILE
 								  BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+#ifdef DEBUG
+								  BAMBOO_DEBUGPRINT(0xf000);
+#endif
 								  for(i = 1; i < NUMCORES; ++i) {
 									  // send profile request msg to core i
 									  send_msg_2(i, 6, totalexetime);
@@ -381,6 +403,9 @@ inline void run(void * arg) {
 								  outputProfileData();
 								  while(true) {
 									  BAMBOO_START_CRITICAL_SECTION_STATUS();
+#ifdef DEBUG
+									  BAMBOO_DEBUGPRINT(0xf001);
+#endif
 									  profilestatus[corenum] = 0;
 									  // check the status of all cores
 									  allStall = true;
@@ -399,6 +424,9 @@ inline void run(void * arg) {
 									  if(!allStall) {
 										  int halt = 100;
 										  BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+#ifdef DEBUG
+										  BAMBOO_DEBUGPRINT(0xf000);
+#endif
 										  while(halt--) {
 										  }
 									  } else {
@@ -426,6 +454,9 @@ inline void run(void * arg) {
 						  numconfirm = 0;
 					  }
 					  BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+#ifdef DEBUG
+					  BAMBOO_DEBUGPRINT(0xf000);
+#endif
 				  }
 			  } else {
 				  if(!sendStall) {
@@ -941,7 +972,7 @@ foundtag:
 					}
 				}
 			}
-	
+
 			/* Check flags */
 			for(i=0; i<parameter->numberofterms; i++) {
 				int andmask=parameter->intarray[i*2];
@@ -1778,8 +1809,14 @@ bool getreadlock(void * ptr) {
     // reside on this core
     int deny = 0;
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf001);
+#endif
 	deny = processlockrequest(0, lock2require, (int)ptr, corenum, corenum, false);
 	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf000);
+#endif
     if(deny == -1) {
 		// redirected
 		return true;
@@ -1820,6 +1857,9 @@ void releasereadlock(void * ptr) {
 
   if(targetcore == corenum) {
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf001);
+#endif
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
@@ -1832,6 +1872,9 @@ void releasereadlock(void * ptr) {
       lockvalue->value--;
     }
 	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf000);
+#endif
     return;
   } else {
 	// send lock release msg
@@ -1932,8 +1975,14 @@ bool getwritelock(void * ptr) {
     // reside on this core
     int deny = 0;
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf001);
+#endif
 	deny = processlockrequest(1, lock2require, (int)ptr, corenum, corenum, false);
 	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf000);
+#endif
 #ifdef DEBUG
     BAMBOO_DEBUGPRINT(0xe555);
     BAMBOO_DEBUGPRINT_REG(lockresult);
@@ -1985,6 +2034,9 @@ void releasewritelock(void * ptr) {
 
   if(targetcore == corenum) {
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf001);
+#endif
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
@@ -1997,6 +2049,9 @@ void releasewritelock(void * ptr) {
       lockvalue->value++;
     }
 	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf000);
+#endif
     return;
   } else {
 	// send lock release msg
@@ -2019,6 +2074,9 @@ void releasewritelock_r(void * lock, void * redirectlock) {
 
   if(targetcore == corenum) {
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf001);
+#endif
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
@@ -2041,6 +2099,9 @@ void releasewritelock_r(void * lock, void * redirectlock) {
 #endif
     }
 	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+	BAMBOO_DEBUGPRINT(0xf000);
+#endif
     return;
   } else {
 	  // send lock release with redirect info msg
@@ -2618,6 +2679,9 @@ newtask:
 #endif
 		  getwritelock(lock);
 		  BAMBOO_START_CRITICAL_SECTION();
+#ifdef DEBUG
+		  BAMBOO_DEBUGPRINT(0xf001);
+#endif
 #ifdef PROFILE
 		  //isInterrupt = false;
 #endif 
@@ -2643,6 +2707,9 @@ newtask:
 		  //isInterrupt = true;
 #endif
 		  BAMBOO_CLOSE_CRITICAL_SECTION();
+#ifdef DEBUG
+		  BAMBOO_DEBUGPRINT(0xf000);
+#endif
 
 		  if(grount == 0) {
 			  int j = 0;
@@ -2682,6 +2749,9 @@ newtask:
 	BAMBOO_CACHE_FLUSH_RANGE((int)parameter, classsize[((struct ___Object___ *)parameter)->type]);
 	/*
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xf001);
+#endif
 	if(RuntimeHashcontainskey(objRedirectLockTbl, (int)parameter)) {
 		int redirectlock_r = 0;
 		RuntimeHashget(objRedirectLockTbl, (int)parameter, &redirectlock_r);
@@ -2689,6 +2759,9 @@ newtask:
 		RuntimeHashremovekey(objRedirectLockTbl, (int)parameter);
 	}
 	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xf000);
+#endif
 */
 #endif
 	tmpparam = (struct ___Object___ *)parameter;
-- 
2.34.1