From 4ee5c456c36d34fa6f6ae88750f29eb41f6daa24 Mon Sep 17 00:00:00 2001
From: jzhou <jzhou>
Date: Wed, 17 Mar 2010 00:03:17 +0000
Subject: [PATCH] code clean: define TILERA_BME mode and TILERA_ZLINUX mode. 
 TILERA_ZLINUX mode is not tested yet

---
 Robust/src/IR/Flat/BuildCodeMultiCore.java |  16 +--
 Robust/src/Runtime/RAW/runtime_arch.h      |   1 -
 Robust/src/Runtime/RAW/task_arch.c         |  20 +--
 Robust/src/Runtime/mem.c                   |  14 +-
 Robust/src/Runtime/multicoregarbage.c      | 143 +++++++++++----------
 Robust/src/Runtime/multicoreruntime.h      |  78 ++++++-----
 Robust/src/Runtime/multicoretask.c         | 139 ++++++++++----------
 Robust/src/Runtime/runtime.h               |   2 +-
 Robust/src/buildscript                     |  84 ++++++++----
 9 files changed, 265 insertions(+), 232 deletions(-)
diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java
index 94380ba1..89bc6530 100644
--- a/Robust/src/IR/Flat/BuildCodeMultiCore.java
+++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java
@@ -676,9 +676,9 @@ public class BuildCodeMultiCore extends BuildCode {
 		output.println("BAMBOO_DEBUGPRINT(0xAAAA);");
 		output.println("BAMBOO_DEBUGPRINT_REG(tmpsum);"); 
 	} else {
-		output.println("BAMBOO_START_CRITICAL_SECTION();");
+		//output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();");
 		output.println("tprintf(\"Process %x(%d): task %s\\n\", corenum, corenum, \"" + task.getSymbol() + "\");");
-		output.println("BAMBOO_CLOSE_CRITICAL_SECTION();");
+		//output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();");
 	}
 	//output.println("BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME());");
     output.println("#endif");
@@ -687,9 +687,9 @@ public class BuildCodeMultiCore extends BuildCode {
 		output.println("BAMBOO_DEBUGPRINT(0xAAAA);");
 		output.println("BAMBOO_DEBUGPRINT_REG(tmpsum);");
 	} else {
-		output.println("BAMBOO_START_CRITICAL_SECTION();");
+		//output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();");
 		output.println("tprintf(\"Process %x(%d): task %s\\n\", corenum, corenum, \"" + task.getSymbol() + "\");");
-		output.println("BAMBOO_CLOSE_CRITICAL_SECTION();");
+		//output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();");
 	}
     output.println("#endif");
 	if(this.state.RAW) {
@@ -728,7 +728,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	if (current_node.kind()!=FKind.FlatReturnNode) {
 	  //output.println("   flushAll();");
 	  output.println("#ifdef CACHEFLUSH");
-	  output.println("BAMBOO_START_CRITICAL_SECTION();");
+	  output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();");
 	  output.println("#ifdef DEBUG");
 	  output.println("BAMBOO_DEBUGPRINT(0xec00);");
 	  output.println("#endif");
@@ -736,7 +736,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	  output.println("#ifdef DEBUG");
 	  output.println("BAMBOO_DEBUGPRINT(0xecff);");
 	  output.println("#endif");
-	  output.println("BAMBOO_CLOSE_CRITICAL_SECTION();");
+	  output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();");
 	  output.println("#endif");
 	  outputTransCode(output);
 	  output.println("   return;");
@@ -1619,7 +1619,7 @@ public class BuildCodeMultiCore extends BuildCode {
     } else {
       if(fm.getTask() != null) {
 	output.println("#ifdef CACHEFLUSH");
-	output.println("BAMBOO_START_CRITICAL_SECTION();");
+	output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();");
 	output.println("#ifdef DEBUG");
 	output.println("BAMBOO_DEBUGPRINT(0xec00);");
 	output.println("#endif");
@@ -1627,7 +1627,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	output.println("#ifdef DEBUG");
 	output.println("BAMBOO_DEBUGPRINT(0xecff);");
 	output.println("#endif");
-	output.println("BAMBOO_CLOSE_CRITICAL_SECTION();");
+	output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();");
 	output.println("#endif");
 	outputTransCode(output);
       }
diff --git a/Robust/src/Runtime/RAW/runtime_arch.h b/Robust/src/Runtime/RAW/runtime_arch.h
index 6e33e5cf..19210e6e 100644
--- a/Robust/src/Runtime/RAW/runtime_arch.h
+++ b/Robust/src/Runtime/RAW/runtime_arch.h
@@ -13,7 +13,6 @@
 #define BAMBOO_CACHE_LINE_SIZE (kCacheLineSize)
 #define BAMBOO_CACHE_LINE_MASK (kCacheLineMask)
 
-#define BAMBOO_TOTALCORE (raw_get_num_tiles())  // the total # of cores available in the processor
 #define BAMBOO_NUM_OF_CORE corenum   // the # of current residing core
 #define BAMBOO_GET_NUM_OF_CORE() (raw_get_abs_pos_x() + raw_get_array_size_x() * raw_get_abs_pos_y())  // compute the # of current residing core
 #define BAMBOO_DEBUGPRINT(x) (raw_test_pass((x)))
diff --git a/Robust/src/Runtime/RAW/task_arch.c b/Robust/src/Runtime/RAW/task_arch.c
index 40c42655..4a247a3b 100644
--- a/Robust/src/Runtime/RAW/task_arch.c
+++ b/Robust/src/Runtime/RAW/task_arch.c
@@ -790,7 +790,7 @@ bool getreadlock(void * ptr) {
   } else {
 	lock2require = (int)(((struct ___Object___ *)ptr)->lock);
   }
-  targetcore = (lock2require >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (lock2require >> 5) % NUMCORES;
   lockflag = false;
 #ifndef INTERRUPT
   reside = false;
@@ -849,7 +849,7 @@ void releasereadlock(void * ptr) {
   } else {
 	reallock = (int)(((struct ___Object___ *)ptr)->lock);
   }
-  targetcore = (reallock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (reallock >> 5) % NUMCORES;
 
   if(targetcore == BAMBOO_NUM_OF_CORE) {
 	BAMBOO_START_CRITICAL_SECTION_LOCK();
@@ -892,7 +892,7 @@ bool getreadlock_I_r(void * ptr, void * redirectlock, int core, bool cache) {
 #endif
 	  lockresult = 0;
   }  
-  targetcore = ((int)redirectlock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = ((int)redirectlock >> 5) % NUMCORES;
   
   if(targetcore == BAMBOO_NUM_OF_CORE) {
     // reside on this core
@@ -957,7 +957,7 @@ bool getwritelock(void * ptr) {
   } else {
 	lock2require = (int)(((struct ___Object___ *)ptr)->lock);
   }
-  targetcore = (lock2require >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (lock2require >> 5) % NUMCORES;
   lockflag = false;
 #ifndef INTERRUPT
   reside = false;
@@ -1024,7 +1024,7 @@ void releasewritelock(void * ptr) {
   } else {
 	reallock = (int)(((struct ___Object___ *)ptr)->lock);
   }
-  targetcore = (reallock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (reallock >> 5) % NUMCORES;
 
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xe661);
@@ -1069,7 +1069,7 @@ bool getwritelock_I(void * ptr) {
   } else {
 	lock2require = (int)(((struct ___Object___ *)ptr)->lock);
   }
-  targetcore = (lock2require >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (lock2require >> 5) % NUMCORES;
   lockflag = false;
 #ifndef INTERRUPT
   reside = false;
@@ -1134,7 +1134,7 @@ bool getwritelock_I_r(void * ptr, void * redirectlock, int core, bool cache) {
 #endif
 	  lockresult = 0;
   }
-  targetcore = ((int)redirectlock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = ((int)redirectlock >> 5) % NUMCORES;
 
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xe571);
@@ -1204,7 +1204,7 @@ void releasewritelock_I(void * ptr) {
   } else {
 	reallock = (int)(((struct ___Object___ *)ptr)->lock);
   }
-  targetcore = (reallock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (reallock >> 5) % NUMCORES;
 
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xe681);
@@ -1236,7 +1236,7 @@ void releasewritelock_I(void * ptr) {
 void releasewritelock_I_r(void * lock, void * redirectlock) {
   int targetcore = 0;
   int reallock = (int)lock;
-  targetcore = (reallock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (reallock >> 5) % NUMCORES;
 
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xe691);
@@ -1282,7 +1282,7 @@ void releasewritelock_I_r(void * lock, void * redirectlock) {
 //            1: the lock request is denied
 __attribute__((always_inline)) int processlockrequest(int locktype, int lock, int obj, int requestcore, int rootrequestcore, bool cache) {
   int deny = 0;
-  if( ((lock >> 5) % BAMBOO_TOTALCORE) != BAMBOO_NUM_OF_CORE ) {
+  if( ((lock >> 5) % NUMCORES) != BAMBOO_NUM_OF_CORE ) {
 	  // the lock should not be on this core
 #ifndef TILERA
 	  BAMBOO_DEBUGPRINT_REG(requestcore);
diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c
index 9cc3da92..1df82bae 100644
--- a/Robust/src/Runtime/mem.c
+++ b/Robust/src/Runtime/mem.c
@@ -8,12 +8,12 @@ void * mycalloc(int m,
 		            int size) {
   void * p = NULL;
   int isize = size; 
-  BAMBOO_START_CRITICAL_SECTION_MEM();
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
 	  BAMBOO_EXIT(0xc001);
   }
-  BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
   return p;
 }
 
@@ -25,7 +25,7 @@ void * mycalloc_share(struct garbagelist * stackptr,
   int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
 	bool hasgc = false;
 memalloc:
-  BAMBOO_START_CRITICAL_SECTION_MEM();
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
 	tprintf("ask for shared mem: %x \n", isize);
 #endif
@@ -35,7 +35,7 @@ memalloc:
 #endif
   if(p == NULL) {
 		// no more global shared memory
-		BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		if(!hasgc) {
 			// start gc
 			gc(stackptr);
@@ -48,7 +48,7 @@ memalloc:
 		// try to malloc again
 		goto memalloc;
   }
-  BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 	void * alignedp = 
 		(void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
 	BAMBOO_MEMSET_WH(p, -2, (alignedp - p));
@@ -60,13 +60,13 @@ void * mycalloc_share(int m,
 		                  int size) {
   void * p = NULL;
   int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
-  BAMBOO_START_CRITICAL_SECTION_MEM();
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
   p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
   if(p == NULL) {
 		// no more global shared memory
 		BAMBOO_EXIT(0xc003);
   }
-  BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
   return 
 		(void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
 }
diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c
index 3ffe8e54..c7314cd2 100644
--- a/Robust/src/Runtime/multicoregarbage.c
+++ b/Robust/src/Runtime/multicoregarbage.c
@@ -106,8 +106,7 @@ inline void dumpSMem() {
 				coren = gc_block2core[block%(NUMCORES4GC*2)];
 			}
 			// compute core coordinate
-			x = bamboo_cpu2coords[coren*2]; 
-			y = bamboo_cpu2coords[coren*2+1];
+			BAMBOO_COORDS(coren, &x, &y); 
 			tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", 
 					    block, sblock++, x, y, 
 							(sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
@@ -416,7 +415,7 @@ inline void checkMarkStatue() {
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xee02);
 #endif
-		BAMBOO_START_CRITICAL_SECTION_STATUS();  
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 		gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 		gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 		gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
@@ -440,10 +439,11 @@ inline void checkMarkStatue() {
 				gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
 				waitconfirm = true;
 				numconfirm = NUMCORESACTIVE - 1;
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				for(i = 1; i < NUMCORESACTIVE; ++i) {	
 					gccorestatus[i] = 1;
 					// send mark phase finish confirm request msg to core i
-					send_msg_1(i, GCMARKCONFIRM, true);
+					send_msg_1(i, GCMARKCONFIRM);
 				} // for(i = 1; i < NUMCORESACTIVE; ++i) 
 			} else {
 				// check if the sum of send objs and receive obj are the same
@@ -481,9 +481,11 @@ inline void checkMarkStatue() {
 					}
 					waitconfirm = false;
 				}// if(0 == sumsendobj) else ...
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 			} // if(!gcwaitconfirm) else()
-		} // if(allStall)
-		BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+		} else {
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+		}  // if(allStall)
 	} // if((!waitconfirm)...
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT(0xee0a);
@@ -506,7 +508,7 @@ inline bool preGC() {
 		for(i = 1; i < NUMCORESACTIVE; ++i) {	
 			corestatus[i] = 1;
 			// send status confirm msg to core i
-			send_msg_1(i, STATUSCONFIRM, false);
+			send_msg_1(i, STATUSCONFIRM);
 		} // for(i = 1; i < NUMCORESACTIVE; ++i)
 
 #ifdef DEBUG
@@ -960,11 +962,11 @@ inline void moveLObjs() {
 				gcheaptop += size;
 				// cache the mapping info anyway
 				//if(ptr != tmpheaptop) {
-				BAMBOO_START_CRITICAL_SECTION();
+				BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 				//mgchashInsert_I(ptr, tmpheaptop);
 				RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
 				//MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				//}
 #ifdef DEBUG
 				BAMBOO_DEBUGPRINT(0xcdca);
@@ -973,7 +975,7 @@ inline void moveLObjs() {
 #endif
 				if(host != BAMBOO_NUM_OF_CORE) {
 					// send the original host core with the mapping info
-					send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
+					send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
 #ifdef DEBUG
 					BAMBOO_DEBUGPRINT(0xcdcb);
 					BAMBOO_DEBUGPRINT_REG(ptr);
@@ -1044,11 +1046,11 @@ inline void moveLObjs() {
 				cpysize += isize;
 				// cache the mapping info anyway
 				//if(ptr != tmpheaptop) {
-				BAMBOO_START_CRITICAL_SECTION();
+				BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 				//mgchashInsert_I(ptr, tmpheaptop);
 				RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
 				//MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				//}
 #ifdef DEBUG
 				BAMBOO_DEBUGPRINT(0xcdcc);
@@ -1058,7 +1060,7 @@ inline void moveLObjs() {
 #endif
 				if(host != BAMBOO_NUM_OF_CORE) {
 					// send the original host core with the mapping info
-					send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
+					send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
 #ifdef DEBUG
 					BAMBOO_DEBUGPRINT(0xcdcd);
 					BAMBOO_DEBUGPRINT_REG(ptr);
@@ -1114,14 +1116,14 @@ inline void markObj(void * objptr) {
 		int host = hostcore(objptr);
 		if(BAMBOO_NUM_OF_CORE == host) {
 			// on this core
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			if(((int *)objptr)[6] == INIT) {
 				// this is the first time that this object is discovered,
 				// set the flag as DISCOVERED
 				((int *)objptr)[6] = DISCOVERED;
 				gc_enqueue_I(objptr);  
 			}
-			BAMBOO_CLOSE_CRITICAL_SECTION();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		} else {
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT(0xbbbb);
@@ -1134,7 +1136,7 @@ inline void markObj(void * objptr) {
 				unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
 				// send a msg to host informing that objptr is active
-				send_msg_2(host, GCMARKEDOBJ, objptr, false);
+				send_msg_2(host, GCMARKEDOBJ, objptr);
 #ifdef GC_PROFILE
 				marktime += BAMBOO_GET_EXE_TIME() - ttime;
 				num_markrequest++;
@@ -1144,9 +1146,9 @@ inline void markObj(void * objptr) {
 			}
 		}
 	} else {
-		BAMBOO_START_CRITICAL_SECTION();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 		gc_enqueue_I(objptr);
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 	} // if(ISSHAREDOBJ(objptr))
 } // void markObj(void * objptr) 
 
@@ -1289,9 +1291,9 @@ inline void mark(bool isfirst,
 		if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04); 
 #endif
 		while(true) {
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			bool hasItems = gc_moreItems2_I();
-			BAMBOO_CLOSE_CRITICAL_SECTION();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG 
 			BAMBOO_DEBUGPRINT(0xed05); 
 #endif
@@ -1323,10 +1325,10 @@ inline void mark(bool isfirst,
 						BAMBOO_DEBUGPRINT_REG(ptr);
 						BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
 #endif
-						BAMBOO_START_CRITICAL_SECTION();
+						BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 						gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
 						gcnumlobjs++;
-						BAMBOO_CLOSE_CRITICAL_SECTION();
+						BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 						// mark this obj
 						((int *)ptr)[6] = MARKED;
 					} else if(isnotmarked) {
@@ -1361,7 +1363,7 @@ inline void mark(bool isfirst,
 					// check if this obj has been forwarded
 					if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
 						// send a msg to host informing that ptr is active
-						send_msg_2(host, GCMARKEDOBJ, ptr, false);
+						send_msg_2(host, GCMARKEDOBJ, ptr);
 						gcself_numsendobjs++;
 						MGCHashadd(gcforwardobjtbl, (int)ptr);
 					}
@@ -1419,7 +1421,7 @@ inline void mark(bool isfirst,
 				BAMBOO_DEBUGPRINT(0xed09);
 #endif
 				send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
-									 gcself_numsendobjs, gcself_numreceiveobjs, false);
+									 gcself_numsendobjs, gcself_numreceiveobjs);
 				sendStall = true;
 			}
 		} // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
@@ -1448,7 +1450,7 @@ inline void compact2Heaptophelper_I(int coren,
 		gcdstcore = gctopcore;
 		gcblock2fill = *numblocks + 1;
 	} else {
-		send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, true); 
+		send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1); 
 	}
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT_REG(coren);
@@ -1508,7 +1510,7 @@ inline void compact2Heaptop() {
 		((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
 	 :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
 	// check if the top core finishes
-	BAMBOO_START_CRITICAL_SECTION();
+	BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 	if(gccorestatus[gctopcore] != 0) {
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xd101);
@@ -1516,10 +1518,10 @@ inline void compact2Heaptop() {
 #endif
 		// let the top core finishes its own work first
 		compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		return;
 	}
-	BAMBOO_CLOSE_CRITICAL_SECTION();
+	BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT(0xd102);
@@ -1529,7 +1531,7 @@ inline void compact2Heaptop() {
 	BAMBOO_DEBUGPRINT_REG(remain);
 #endif
 	for(int i = 0; i < NUMCORES4GC; i++) {
-		BAMBOO_START_CRITICAL_SECTION();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 		if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT(0xd103);
@@ -1540,12 +1542,12 @@ inline void compact2Heaptop() {
 				BAMBOO_DEBUGPRINT(0xd101);
 				BAMBOO_DEBUGPRINT_REG(gctopcore);
 #endif
-			  BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				// the top core is not free now
 				return;
 			}
 		} // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 	} // for(i = 0; i < NUMCORES4GC; i++)
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT(0xd106);
@@ -1604,12 +1606,12 @@ inline void resolvePendingMoveRequest() {
 			// find match
 			int tomove = 0;
 			int startaddr = 0;
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore, 
 					                                       gcrequiredmems[dstcore], 
 																							   &tomove, 
 																							   &startaddr);
-			BAMBOO_CLOSE_CRITICAL_SECTION();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT(0xeb02);
 			BAMBOO_DEBUGPRINT_REG(sourcecore);
@@ -1630,7 +1632,7 @@ inline void resolvePendingMoveRequest() {
 				BAMBOO_DEBUGPRINT(0xeb04);
 #endif
 				send_msg_4(dstcore, GCMOVESTART, sourcecore, 
-						startaddr, tomove, false);
+						startaddr, tomove);
 			}
 			gcmovepending--;
 			nosparemem = true;
@@ -1911,11 +1913,11 @@ innermoveobj:
 			BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
 		}
 		// store mapping info
-		BAMBOO_START_CRITICAL_SECTION();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 		//mgchashInsert_I(orig->ptr, to->ptr);
 		RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr); 
 		//MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 	  //}
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xcdce);
@@ -2061,7 +2063,7 @@ innercompact:
 #endif
 			// ask for more mem
 			gctomove = false;
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
 						              gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
 #ifdef DEBUG
@@ -2069,13 +2071,13 @@ innercompact:
 #endif
 				gctomove = true;
 			} else {
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 				BAMBOO_DEBUGPRINT(0xe105);
 #endif
 				return false; 
 			}
-			BAMBOO_CLOSE_CRITICAL_SECTION();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		} else {
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT(0xe106);
@@ -2092,7 +2094,7 @@ innercompact:
 			// ask for more mem
 			gctomove = false;
 			send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
-					       *filledblocks, *heaptopptr, gccurr_heaptop, false);
+					       *filledblocks, *heaptopptr, gccurr_heaptop);
 		} else {
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT(0xe108);
@@ -2100,7 +2102,7 @@ innercompact:
 #endif
 			// finish compacting
 			send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-					       *filledblocks, *heaptopptr, 0, false);
+					       *filledblocks, *heaptopptr, 0);
 		}
 	} // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
 
@@ -2164,7 +2166,7 @@ inline void compact() {
 		BAMBOO_DEBUGPRINT_REG(to->base);
 #endif
 		send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-				       0, to->base, 0, false);
+				       0, to->base, 0);
 		RUNFREE(orig);
 		RUNFREE(to);
 		return;
@@ -2197,7 +2199,7 @@ inline void * flushObj(void * objptr) {
 		BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
 		// a shared obj ptr, change to new address
-		BAMBOO_START_CRITICAL_SECTION();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef GC_PROFILE
 		unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
@@ -2207,7 +2209,7 @@ inline void * flushObj(void * objptr) {
 		flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
 #endif
 		//MGCHashget(gcpointertbl, objptr, &dstptr);
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT_REG(dstptr);
 #endif
@@ -2240,7 +2242,7 @@ inline void * flushObj(void * objptr) {
 				// the first time require the mapping, send msg to the hostcore 
 				// for the mapping info
 				send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
-									 BAMBOO_NUM_OF_CORE, false);
+									 BAMBOO_NUM_OF_CORE);
 				while(true) {
 					if(gcismapped) {
 						break;
@@ -2253,11 +2255,11 @@ inline void * flushObj(void * objptr) {
 				// TODO
 				//flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
 #endif
-				BAMBOO_START_CRITICAL_SECTION();
+				BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 				//dstptr = mgchashSearch(objptr);
 				RuntimeHashget(gcpointertbl, objptr, &dstptr);
 				//MGCHashget(gcpointertbl, objptr, &dstptr);
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 			} // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT_REG(dstptr);
@@ -2388,9 +2390,9 @@ inline void flush(struct garbagelist * stackptr) {
 #endif
 
 	while(true) {
-		BAMBOO_START_CRITICAL_SECTION();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 		bool hasItems = gc_moreItems_I();
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		if(!hasItems) {
 			break;
 		}
@@ -2398,9 +2400,9 @@ inline void flush(struct garbagelist * stackptr) {
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xe301);
 #endif
-		BAMBOO_START_CRITICAL_SECTION();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 		void * ptr = gc_dequeue_I();
-		BAMBOO_CLOSE_CRITICAL_SECTION();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		if(ISSHAREDOBJ(ptr)) {
 			// should be a local shared obj and should have mapping info
 			ptr = flushObj(ptr);
@@ -2479,7 +2481,6 @@ inline void flush(struct garbagelist * stackptr) {
 			}
 		} // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
 	} // while(gc_moreItems())
-	BAMBOO_CLOSE_CRITICAL_SECTION();
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT(0xe308);
 #endif
@@ -2580,7 +2581,7 @@ inline void flush(struct garbagelist * stackptr) {
 	if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 		gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 	} else {
-		send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
+		send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
 	}
 #ifdef GC_PROFILE
 	if(BAMBOO_NUM_OF_CORE == 0) {
@@ -2609,7 +2610,7 @@ inline void gc_collect(struct garbagelist * stackptr) {
 #endif
 	initGC();
 	//send init finish msg to core coordinator
-	send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
+	send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
 	while(true) {
 		if(MARKPHASE == gcphase) {
 			break;
@@ -2660,7 +2661,7 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
 #endif
 	initGC();
 	//send init finish msg to core coordinator
-	send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
+	send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
 	while(true) {
 		if(MARKPHASE == gcphase) {
 			break;
@@ -2733,7 +2734,7 @@ inline void gc(struct garbagelist * stackptr) {
 		// Note: all cores need to init gc including non-gc cores
 		for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; i++) {
 			// send GC init messages to all cores
-			send_msg_1(i, GCSTARTINIT, false);
+			send_msg_1(i, GCSTARTINIT);
 		}
 		bool isfirst = true;
 		bool allStall = false;
@@ -2744,12 +2745,12 @@ inline void gc(struct garbagelist * stackptr) {
 
 		gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 		while(true) {
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			if(gc_checkAllCoreStatus_I()) {
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				break;
 			}
-			BAMBOO_CLOSE_CRITICAL_SECTION();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		}
 #ifdef GC_PROFILE
 		gc_profileItem();
@@ -2764,7 +2765,7 @@ inline void gc(struct garbagelist * stackptr) {
 		for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) {
 			gccorestatus[i] = 1;
 			// send GC start messages to all cores
-			send_msg_1(i, GCSTART, false);
+			send_msg_1(i, GCSTART);
 		}
 
 		gcphase = MARKPHASE;
@@ -2782,7 +2783,7 @@ inline void gc(struct garbagelist * stackptr) {
 		// Note: only need to ask gc cores, non-gc cores do not host any objs
 		numconfirm = NUMCORES4GC - 1;
 		for(i = 1; i < NUMCORES4GC; ++i) {
-			send_msg_1(i, GCLOBJREQUEST, false);
+			send_msg_1(i, GCLOBJREQUEST);
 		}
 		gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 		while(true) {
@@ -2838,14 +2839,14 @@ inline void gc(struct garbagelist * stackptr) {
 			if (tmpcoreptr < tmpheaptop/*tmptopptr*/) {
 				gcstopblock[i] = numpbc + 1;
 				if(i != STARTUPCORE) {
-					send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false); 
+					send_msg_2(i, GCSTARTCOMPACT, numpbc+1); 
 				} else {
 					gcblock2fill = numpbc+1;
 				} // if(i != STARTUPCORE)
 			} else {
 				gcstopblock[i] = numpbc;
 				if(i != STARTUPCORE) {
-					send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
+					send_msg_2(i, GCSTARTCOMPACT, numpbc);
 				} else {
 					gcblock2fill = numpbc;
 				} // if(i != STARTUPCORE)
@@ -2897,17 +2898,17 @@ inline void gc(struct garbagelist * stackptr) {
 #endif
 			}
 
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			if(gc_checkCoreStatus_I()) {
 				// all cores have finished compacting
 				// restore the gcstatus of all cores
 				for(i = 0; i < NUMCORES4GC; ++i) {
 					gccorestatus[i] = 1;
 				}
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				break;
 			} else {
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				// check if there are spare mem for pending move requires
 				if(COMPACTPHASE == gcphase) {
 #ifdef DEBUG
@@ -2982,7 +2983,7 @@ inline void gc(struct garbagelist * stackptr) {
 		for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) {
 			// send start flush messages to all cores
 			gccorestatus[i] = 1;
-			send_msg_1(i, GCSTARTFLUSH, false);
+			send_msg_1(i, GCSTARTFLUSH);
 		}
 #ifdef GC_PROFILE
 		gc_profileItem();
@@ -2995,12 +2996,12 @@ inline void gc(struct garbagelist * stackptr) {
 		gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 		while(FLUSHPHASE == gcphase) {
 			// check the status of all cores
-			BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 			if(gc_checkAllCoreStatus_I()) {
-				BAMBOO_CLOSE_CRITICAL_SECTION();
+				BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 				break;
 			}
-			BAMBOO_CLOSE_CRITICAL_SECTION();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 		} // while(FLUSHPHASE == gcphase)
 		gcphase = FINISHPHASE;
 
@@ -3019,7 +3020,7 @@ inline void gc(struct garbagelist * stackptr) {
 		gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
 		for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) {
 			// send gc finish messages to all cores
-			send_msg_1(i, GCFINISH, false);
+			send_msg_1(i, GCFINISH);
 			gccorestatus[i] = 1;
 		}
 #ifdef RAWPATH // TODO GC_DEBUG
diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h
index 63b324a9..dfbd9e88 100644
--- a/Robust/src/Runtime/multicoreruntime.h
+++ b/Robust/src/Runtime/multicoreruntime.h
@@ -62,6 +62,8 @@ volatile bool isMsgSending;
 	outmsgdata[outmsglast] = (n); \
   OUTMSG_LASTINDEXINC(); 
 
+#define MAX_PACKET_WORDS 5
+
 /* Message format:
  *      type + Msgbody
  * type: 1 -- transfer object
@@ -401,38 +403,36 @@ INLINE void processlockrelease(int locktype,
 // msg related functions
 INLINE void send_hanging_msg();
 INLINE void send_msg_1(int targetcore, 
-		                   unsigned long n0,
-											 bool isinterrupton);
+		                   unsigned long n0);
 INLINE void send_msg_2(int targetcore, 
 		                   unsigned long n0, 
-											 unsigned long n1,
-											 bool isinterrupton);
+											 unsigned long n1);
 INLINE void send_msg_3(int targetcore, 
 		                   unsigned long n0, 
 											 unsigned long n1, 
-											 unsigned long n2,
-											 bool isinterrupton);
+											 unsigned long n2);
 INLINE void send_msg_4(int targetcore, 
 		                   unsigned long n0, 
 											 unsigned long n1, 
 											 unsigned long n2, 
-											 unsigned long n3,
-											 bool isinterrupton);
+											 unsigned long n3);
 INLINE void send_msg_5(int targetcore, 
 		                   unsigned long n0, 
 											 unsigned long n1, 
 											 unsigned long n2, 
 											 unsigned long n3, 
-											 unsigned long n4,
-											 bool isinterrupton);
+											 unsigned long n4);
 INLINE void send_msg_6(int targetcore, 
 		                   unsigned long n0, 
 											 unsigned long n1, 
 											 unsigned long n2, 
 											 unsigned long n3, 
 											 unsigned long n4, 
-											 unsigned long n5,
-											 bool isinterrupton);
+											 unsigned long n5);
+INLINE void send_msg_3_I(int targetcore, 
+  		                   unsigned long n0, 
+	  										 unsigned long n1, 
+		  									 unsigned long n2);
 INLINE void cache_msg_1(int targetcore, 
 												unsigned long n0);
 INLINE void cache_msg_2(int targetcore, 
@@ -461,7 +461,7 @@ INLINE void cache_msg_6(int targetcore,
 												unsigned long n4, 
 												unsigned long n5);
 INLINE void transferObject(struct transObjInfo * transObj);
-INLINE int receiveMsg(void);
+INLINE int receiveMsg(uint32_t send_port_pending);
 
 #ifdef MULTICORE_GC
 INLINE void transferMarkResults();
@@ -477,53 +477,47 @@ void outputProfileData();
 /////////////////////////////////////////////////////////////////////////////
 // For each version of BAMBOO runtime, there should be a header file named //
 // runtim_arch.h defining following MARCOS:                                //
-// BAMBOO_TOTALCORE: the total # of cores in the processor                 //
 // BAMBOO_NUM_OF_CORE: the # of current residing core                      //
 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core        //
+// BAMBOO_COORDS(c, x, y): convert the cpu # to coords (*x, *y)            //
 // BAMBOO_DEBUGPRINT(x): print out integer x                               //
 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                 //
+// BAMBOO_EXIT(x): exit routine                                            //
+// BAMBOO_DIE(x): error exit routine                                       //
+// BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number       //
+// BAMBOO_MSG_AVAIL(): checking if there are msgs coming in                //
+// BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in            //
+// BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(): change to runtime mode from    //
+//                                          client mode                    //
+// BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(): change to client mode from     //
+//                                          runtime mode                   //
+// BAMBOO_ENTER_SEND_MODE_FROM_CLIENT(): change to send mode from          //
+//                                       client mode                       //
+// BAMBOO_ENTER_CLIENT_MODE_FROM_SEND(): change to client mode from        //
+//                                       send mode                         //
+// BAMBOO_ENTER_RUNTIME_MODE_FROM_SEND(): change to runtime mode from      //
+//                                        send mode                        //
+// BAMBOO_ENTER_SEND_MODE_FROM_RUNTIME(): change to send mode from         //
+//                                        runtime mode                     //
+// BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock      //
+//                            request response                             //
 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of  //
 //                                whose size in bytes is y on local memory //
 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory         //
-// BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of  //
+// BAMBOO_LOCAL_MEM_CLOSE(): close the local heap                          //
+// BAMBOO_SHARE_MEM_CALLOC_I(x, y): allocate an array of x elements each of//
 //                                whose size in bytes is y on shared memory//
-// BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                               //
-// BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data        //
-//                                            structures related to obj    //
-//                                            queue                        //
-// BAMBOO_START_CRITICAL_SECTION_STATUS()                                  //
-// BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
-//                                         related to status data          //
-// BAMBOO_START_CRITICAL_SECTION_MSG()                                     //
-// BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures   //
-//                                      related to msg data                //
-// BAMBOO_START_CRITICAL_SECTION_LOCK()                                    //
-// BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures  //
-//                                       related to lock table             //
-// BAMBOO_START_CRITICAL_SECTION_MEM()                                     //
-// BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory        //
-// BAMBOO_START_CRITICAL_SECTION()                                         //
-// BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures   //
-// BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock      //
-//                            request response                             //
+// BAMBOO_SHARE_MEM_CLOSE(): close the shared heap                        //
 // BAMBOO_CACHE_LINE_SIZE: the cache line size                             //
 // BAMBOO_CACHE_LINE_MASK: mask for a cache line                           //
 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with     //
 //                                 length y                                //
 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary  //
-// BAMBOO_EXIT(x): exit routine                                            //
-// BAMBOO_MSG_AVAIL(): checking if there are msgs coming in                //
-// BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in            //
-// BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number       //
 // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start //
 //                            address x, size z) to value y with write     //
 //                            hint, the processor will not fetch the       //
 //                            current content of the memory and directly   //
 //                            write                                        //
-//                                                                         //
-// runtime_arch.h should also define following global parameters:          //
-// bamboo_cpu2coords: map the cpu # to (x,y) coordinates                   //
-// bamboo_coords2cpu: map the (x,y) coordinates to cpu #                   //
 /////////////////////////////////////////////////////////////////////////////
 
 #endif  // #ifdef MULTICORE
diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c
index cfe56c73..b0c92326 100644
--- a/Robust/src/Runtime/multicoretask.c
+++ b/Robust/src/Runtime/multicoretask.c
@@ -195,6 +195,8 @@ void disruntimedata() {
 		RUNFREE(currtpd);
 		currtpd = NULL;
 	}
+	BAMBOO_LOCAL_MEM_CLOSE();
+	BAMBOO_SHARE_MEM_CLOSE();
 }
 
 inline __attribute__((always_inline))
@@ -215,7 +217,7 @@ bool checkObjQueue() {
 
 	while(!isEmpty(&objqueue)) {
 		void * obj = NULL;
-		BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xf001);
 #endif
@@ -235,7 +237,7 @@ bool checkObjQueue() {
 		grount = 0;
 		getwritelock_I(obj);
 		while(!lockflag) {
-			BAMBOO_WAITING_FOR_LOCK();
+			BAMBOO_WAITING_FOR_LOCK(0);
 		} // while(!lockflag)
 		grount = lockresult;
 #ifdef DEBUG
@@ -305,13 +307,13 @@ bool checkObjQueue() {
 			//isInterrupt = true;
 #endif
 objqueuebreak:
-			BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
+			BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 			BAMBOO_DEBUGPRINT(0xf000);
 #endif
 			break;
 		} // if(grount == 1)
-		BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xf000);
 #endif
@@ -342,7 +344,7 @@ void checkCoreStatus() {
 		BAMBOO_DEBUGPRINT(0xee04);
 		BAMBOO_DEBUGPRINT_REG(waitconfirm);
 #endif
-		BAMBOO_START_CRITICAL_SECTION_STATUS();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xf001);
 #endif
@@ -391,7 +393,7 @@ void checkCoreStatus() {
 					for(i = 1; i < NUMCORESACTIVE; ++i) {	
 						corestatus[i] = 1;
 						// send status confirm msg to core i
-						send_msg_1(i, STATUSCONFIRM, false);
+						send_msg_1(i, STATUSCONFIRM);
 					} // for(i = 1; i < NUMCORESACTIVE; ++i)
 					waitconfirm = true;
 					numconfirm = NUMCORESACTIVE - 1;
@@ -414,18 +416,18 @@ void checkCoreStatus() {
 					// profile mode, send msgs to other cores to request pouring
 					// out progiling data
 #ifdef PROFILE
-					BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+					BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 					BAMBOO_DEBUGPRINT(0xf000);
 #endif
 					for(i = 1; i < NUMCORESACTIVE; ++i) {
 						// send profile request msg to core i
-						send_msg_2(i, PROFILEOUTPUT, totalexetime, false);
+						send_msg_2(i, PROFILEOUTPUT, totalexetime);
 					} // for(i = 1; i < NUMCORESACTIVE; ++i)
 					// pour profiling data on startup core
 					outputProfileData();
 					while(true) {
-						BAMBOO_START_CRITICAL_SECTION_STATUS();
+						BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
 						BAMBOO_DEBUGPRINT(0xf001);
 #endif
@@ -446,7 +448,7 @@ void checkCoreStatus() {
 						}  // for(i = 0; i < NUMCORESACTIVE; ++i)
 						if(!allStall) {
 							int halt = 100;
-							BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+							BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 							BAMBOO_DEBUGPRINT(0xf000);
 #endif
@@ -484,7 +486,7 @@ void checkCoreStatus() {
 			waitconfirm = false;
 			numconfirm = 0;
 		} //  if(allStall)
-		BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 		BAMBOO_DEBUGPRINT(0xf000);
 #endif
@@ -608,7 +610,7 @@ inline void run(void * arg) {
 #endif
 							  // send stall msg
 								send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE, 
-										       self_numsendobjs, self_numreceiveobjs, false);
+										       self_numsendobjs, self_numreceiveobjs);
 							  sendStall = true;
 							  isfirst = true;
 							  busystatus = false;
@@ -1356,7 +1358,7 @@ void * globalmalloc_I(int coren,
 	if(foundsmem == 1) {
 		// find suitable block
 		mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC)?
-				(BAMBOO_SMEM_SIZE_L*tofindb):(BAMBOO_LARGE_SMEM_BOUND+
+				  (BAMBOO_SMEM_SIZE_L*tofindb):(BAMBOO_LARGE_SMEM_BOUND+
 					(tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
 		*allocsize = size;
 		// set bamboo_smemtbl
@@ -1522,6 +1524,7 @@ INLINE int checkMsgLength_I(int size) {
 		default: 
 		{
 			BAMBOO_DEBUGPRINT_REG(type);
+			BAMBOO_DEBUGPRINT_REG(msgdataindex);
 			int i = 6;
 			while(i-- > 0) {
 				BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]);
@@ -1649,13 +1652,13 @@ INLINE void processmsg_lockrequest_I() {
 		return;
 	} else {
 		// send response msg
-		// for 32 bit machine, the size is always 4 words
+		// for 32 bit machine, the size is always 4 words, cache the msg first
 		int tmp = deny==1?LOCKDENY:LOCKGROUNT;
-		if(isMsgSending) {
+		//if(isMsgSending) {
 			cache_msg_4(data4, tmp, locktype, data2, data3);
-		} else {
-			send_msg_4(data4, tmp, locktype, data2, data3, true);
-		}
+		/*} else {
+			send_msg_4(data4, tmp, locktype, data2, data3);
+		}*/
 	}
 }
 
@@ -1750,14 +1753,14 @@ INLINE void processmsg_redirectlock_I() {
 		return;
 	} else {
 		// send response msg
-		// for 32 bit machine, the size is always 4 words
-		if(isMsgSending) {
+		// for 32 bit machine, the size is always 4 words, cache the msg first
+		//if(isMsgSending) {
 			cache_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
 									data1, data2, data3);
-		} else {
+		/*} else {
 			send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
-								 data1, data2, data3, true);
-		}
+								 data1, data2, data3);
+		}*/
 	}
 }
 
@@ -1850,11 +1853,12 @@ INLINE void processmsg_profileoutput_I() {
 	totalexetime = msgdata[msgdataindex]; //[1]
 	MSG_INDEXINC_I();
 	outputProfileData();
-	if(isMsgSending) {
+	// cache the msg first
+	//if(isMsgSending) {
 		cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
-	} else {
-		send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE, true);
-	}
+	/*} else {
+		send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
+	}*/
 }
 
 INLINE void processmsg_profilefinish_I() {
@@ -1888,15 +1892,16 @@ INLINE void processmsg_statusconfirm_I() {
 		BAMBOO_DEBUGPRINT(0xe887);
 #endif
 #endif
-		if(isMsgSending) {
+		// cache the msg first
+		//if(isMsgSending) {
 			cache_msg_5(STARTUPCORE, STATUSREPORT, 
 									busystatus?1:0, BAMBOO_NUM_OF_CORE,
 									self_numsendobjs, self_numreceiveobjs);
-		} else {
+		/*} else {
 			send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, 
 								 BAMBOO_NUM_OF_CORE, self_numsendobjs, 
-								 self_numreceiveobjs, true);
-		}
+								 self_numreceiveobjs);
+		}*/
 	}
 }
 
@@ -1965,23 +1970,24 @@ INLINE void processmsg_memrequest_I() {
 		if(gcprocessing) {
 			// is currently doing gc, dump this msg
 			if(INITPHASE == gcphase) {
-				// if still in the initphase of gc, send a startinit msg again
-				if(isMsgSending) {
+				// if still in the initphase of gc, send a startinit msg again, 
+				// cache the msg first
+				//if(isMsgSending) {
 					cache_msg_1(data2, GCSTARTINIT);
-				} else {
-					send_msg_1(data2, GCSTARTINIT, true);
-				}
+				/*} else {
+					send_msg_1(data2, GCSTARTINIT);
+				}*/
 			}
 		} else { 
 #endif
 		mem = smemalloc_I(data2, data1, &allocsize);
 		if(mem != NULL) {
-			// send the start_va to request core
-			if(isMsgSending) {
+			// send the start_va to request core, cache the msg first
+			//if(isMsgSending) {
 				cache_msg_3(data2, MEMRESPONSE, mem, allocsize);
-			} else {
-				send_msg_3(data2, MEMRESPONSE, mem, allocsize, true);
-			} 
+			/*} else {
+				send_msg_3(data2, MEMRESPONSE, mem, allocsize);
+			}*/ 
 		} // if mem == NULL, the gcflag of the startup core has been set
 			// and the gc should be started later, then a GCSTARTINIT msg
 			// will be sent to the requesting core to notice it to start gc
@@ -2134,11 +2140,12 @@ INLINE void processmsg_gcfinishcompact_I() {
 			int tomove = 0;
 			int dstcore = 0;
 			if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
-				if(isMsgSending) {
+				// cache the msg first
+				//if(isMsgSending) {
 					cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
-			  } else {
-					send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true);
-				}
+			  /*} else {
+					send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
+				}*/
 			}
 		} else {
 			gccorestatus[cnum] = 0;
@@ -2170,16 +2177,16 @@ INLINE void processmsg_gcmarkconfirm_I() {
 		// wrong core to receive such msg
 		BAMBOO_EXIT(0xb005);
 	} else {
-		// send response msg
-		if(isMsgSending) {
+		// send response msg, cahce the msg first
+		//if(isMsgSending) {
 			cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
 									gcbusystatus, gcself_numsendobjs, 
 									gcself_numreceiveobjs);
-		} else {
+		/*} else {
 			send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
 								 gcbusystatus, gcself_numsendobjs, 
-								 gcself_numreceiveobjs, true);
-		}
+								 gcself_numreceiveobjs);
+		}*/
 	}
 }
 
@@ -2265,15 +2272,15 @@ INLINE void processmsg_gcmaprequest_I() {
 		/*if(isMsgSending) {
 			cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
 		} else {
-			send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1], true);
+			send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
 		}*/
 	} else {
-		// send back the mapping info
-		if(isMsgSending) {
+		// send back the mapping info, cache the msg first
+		//if(isMsgSending) {
 			cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
-		} else {
-			send_msg_3(data2, GCMAPINFO, data1, (int)dstptr, true);
-		}
+		/*} else {
+			send_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
+		}*/
 	}
 #ifdef GC_PROFILE
 	flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimei;
@@ -2366,17 +2373,17 @@ INLINE void processmsg_gclobjmapping_I() {
 //               3--received a lock Msg
 //               RAW version: -1 -- received nothing
 //                            otherwise -- received msg type
-int receiveObject() {
+int receiveObject(int send_port_pending) {
 msg:
 	// get the incoming msgs
-  if(receiveMsg() == -1) {
+  if(receiveMsg(send_port_pending) == -1) {
 	  return -1;
   }
 processmsg:
 	// processing received msgs
 	int size = 0;
 	MSG_REMAINSIZE_I(&size);
-  if(checkMsgLength_I(size) == -1) {
+  if((size == 0) || (checkMsgLength_I(size) == -1)) {
 		// not a whole msg
 		// have new coming msg
 		if(BAMBOO_MSG_AVAIL() != 0) {
@@ -2840,7 +2847,7 @@ int containstag(struct ___Object___ *ptr,
 void releasewritelock_r(void * lock, void * redirectlock) {
   int targetcore = 0;
   int reallock = (int)lock;
-  targetcore = (reallock >> 5) % BAMBOO_TOTALCORE;
+  targetcore = (reallock >> 5) % NUMCORES;
 
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xe671);
@@ -2850,7 +2857,7 @@ void releasewritelock_r(void * lock, void * redirectlock) {
 #endif
 
   if(targetcore == BAMBOO_NUM_OF_CORE) {
-	BAMBOO_START_CRITICAL_SECTION_LOCK();
+		BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT(0xf001);
 #endif
@@ -2875,7 +2882,7 @@ void releasewritelock_r(void * lock, void * redirectlock) {
       BAMBOO_DEBUGPRINT_REG(lockvalue->value);
 #endif
     }
-	BAMBOO_CLOSE_CRITICAL_SECTION_LOCK();
+		BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 	BAMBOO_DEBUGPRINT(0xf000);
 #endif
@@ -2884,7 +2891,7 @@ void releasewritelock_r(void * lock, void * redirectlock) {
 	  // send lock release with redirect info msg
 	  // for 32 bit machine, the size is always 4 words
 		send_msg_4(targetcore, REDIRECTRELEASE, 1, (int)lock, 
-				       (int)redirectlock, false);
+				       (int)redirectlock);
   }
 }
 #endif
@@ -2992,7 +2999,7 @@ newtask:
 		  BAMBOO_DEBUGPRINT_REG((int)(runtime_locks[i].value));
 #endif
 		  getwritelock(lock);
-		  BAMBOO_START_CRITICAL_SECTION();
+			BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
 		  BAMBOO_DEBUGPRINT(0xf001);
 #endif
@@ -3000,7 +3007,7 @@ newtask:
 		  //isInterrupt = false;
 #endif 
 		  while(!lockflag) { 
-			  BAMBOO_WAITING_FOR_LOCK();
+			  BAMBOO_WAITING_FOR_LOCK(0);
 		  }
 #ifndef INTERRUPT
 		  if(reside) {
@@ -3020,7 +3027,7 @@ newtask:
 #ifdef PROFILE
 		  //isInterrupt = true;
 #endif
-		  BAMBOO_CLOSE_CRITICAL_SECTION();
+		  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
 		  BAMBOO_DEBUGPRINT(0xf000);
 #endif
diff --git a/Robust/src/Runtime/runtime.h b/Robust/src/Runtime/runtime.h
index 37af2b7e..02ef5503 100644
--- a/Robust/src/Runtime/runtime.h
+++ b/Robust/src/Runtime/runtime.h
@@ -151,7 +151,7 @@ inline void run(void * arg);
 #ifdef MULTICORE_GC
 inline void setupsmemmode(void);
 #endif
-int receiveObject(void);
+int receiveObject(int send_port_pending);
 void flagorand(void * ptr, int ormask, int andmask, struct parameterwrapper ** queues, int length);
 void flagorandinit(void * ptr, int ormask, int andmask);
 void enqueueObject(void * ptr, struct parameterwrapper ** queues,int length);
diff --git a/Robust/src/buildscript b/Robust/src/buildscript
index 49dfc33e..c6621f58 100755
--- a/Robust/src/buildscript
+++ b/Robust/src/buildscript
@@ -22,6 +22,29 @@ echo -abortreaders abort readers immediately
 echo -trueprob double - probabiltiy of true branch
 echo -dsmcaching -enable caching in dsm runtime
 echo
+echo BAMBOO Multicore options
+echo -scheduling do task scheduling
+echo -multicore generate multi-core version binary
+echo "-numcore set the number of cores (should be used together with -multicore), defaultly set as 1"
+echo "-cacheflush enable cache flush in raw version binary (should be used togethere with -raw)"
+echo "-interrupt generate raw version binary with interruption (should be used togethere with -raw)"
+echo "-rawpath print out execute path information for raw version (should be used together with -raw)"
+echo "-useprofile use profiling data for scheduling (should be used together with -raw)"
+echo -printscheduling print out scheduling graphs
+echo -printschedulesim print out scheduling simulator result graphs
+echo -abcclose close the array boundary check
+echo "-tilera_bme generate tilera version binary for Bare Mental Environment (should be used together with -multicore"
+echo "-tilera_zlinux generate tilera version binary for Zero-Overhead Linux (should be used together with -multicore"
+echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)"
+echo "-raw generate raw version binary (should be used together with -multicore)"
+echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)"
+echo -threadsimulate generate multi-thread simulate version binary
+echo -multicoregc generate multi-core binary with garbage collection
+echo "-numcore4gc set the number of cores for gc (should be used together with -multicoregc), defaultly set as 0"
+echo -gcprofile build with gcprofile options
+echo -accurateprofile build with accurate profile information including pre/post task processing info
+echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version"
+echo
 echo Other options
 echo -builddir setup different build directory
 echo -robustroot set up the ROBUSTROOT to directory other than default one
@@ -40,23 +63,6 @@ echo -selfloop task - this task cannot self loop forever
 echo "-excprefetch methoddescriptor - exclude prefetches for this method (specified as class.method)"
 echo -taskstate do task state analysis
 echo -tagstate do tag state analysis
-echo -scheduling do task scheduling
-echo -multicore generate multi-core version binary
-echo "-numcore set the number of cores (should be used together with -multicore), defaultly set as 1"
-echo "-cacheflush enable cache flush in raw version binary (should be used togethere with -raw)"
-echo "-interrupt generate raw version binary with interruption (should be used togethere with -raw)"
-echo "-rawpath print out execute path information for raw version (should be used together with -raw)"
-echo "-useprofile use profiling data for scheduling (should be used together with -raw)"
-echo -printscheduling print out scheduling graphs
-echo -printschedulesim print out scheduling simulator result graphs
-echo -abcclose close the array boundary check
-echo "-tilera generate tilera version binary (should be used together with -multicore"
-echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)"
-echo "-raw generate raw version binary (should be used together with -multicore)"
-echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)"
-echo -threadsimulate generate multi-thread simulate version binary
-echo -multicoregc generate multi-core binary with garbage collection
-echo "-numcore4gc set the number of cores for gc (should be used together with -multicoregc), defaultly set as 0"
 echo -optional enable optional
 echo -debug generate debug symbols
 echo -prefetch do prefetch analysis
@@ -72,9 +78,6 @@ echo -o binary
 echo -nojava do not run bristlecone compiler
 echo -instructionfailures inject code for instructionfailures
 echo -profile build with profile options
-echo -gcprofile build with gcprofile options
-echo -accurateprofile build with accurate profile information including pre/post task processing info
-echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version"
 echo "-enable-assertions execute assert statements during compilation"
 echo -justanalyze exit after compiler analyses complete
 echo "-distributioninfo  execute to collect distribution info for simulated annealing in multi-core version"
@@ -112,6 +115,8 @@ MLPDEBUG=false
 MULTICOREFLAG=false
 RAWFLAG=false
 TILERAFLAG=false
+TILERABMEFLAG=false
+TILERAZLINUXFLAG=false
 TILERACONFIG=''
 CACHEFLUSHFLAG=false
 RAWCONFIG=''
@@ -314,9 +319,14 @@ elif [[ $1 = '-raw' ]]
 then
 RAWFLAG=true
 JAVAOPTS="$JAVAOPTS -raw"
-elif [[ $1 = '-tilera' ]]
+elif [[ $1 = '-tilera_bme' ]]
+then
+TILERAFLAG=true
+TILERABMEFLAG=true
+elif [[ $1 = '-tilera_zlinux' ]]
 then
 TILERAFLAG=true
+TILERAZLINUXFLAG=true
 elif [[ $1 = '-tileraconfig' ]]
 then
 TILERACONFIG="$2"
@@ -644,9 +654,17 @@ make
 elif $TILERAFLAG
 then # TILERAFLAG
 TILERADIR="$CURDIR/tilera"
+if $TILERABMEFLAG
+then # TILERABMEFLAG
+TILERA_INDIR="BME"
 MAKEFILE="Makefile.tilera.$TILERACONFIG"
 SIMHVC="sim.hvc.$TILERACONFIG"
 PCIHVC="pci.hvc.$TILERACONFIG"
+elif $TILERAZLINUXFLAG
+then # TILERAZLINUXFLAG
+TILERA_INDIR="ZLINUX"
+MAKEFILE="Makefile.tilera.$TILERACONFIG"
+fi
 mkdir $TILERADIR
 cd $TILERADIR
 make clean
@@ -654,6 +672,14 @@ rm ./*
 
 export TILERACFLAGS="-DTASK -DMULTICORE -DCLOSE_PRINT -DTILERA"
 
+if $TILERABMEFLAG
+then # TILERABMEFLAG
+TILERACFLAGS="${TILERACFLAGS} -DTILERA_BME"
+elif $TILERAZLINUXFLAG
+then # TILERAZLINUXFLAG
+TILERACFLAGS="${TILERACFLAGS} -DTILERA_ZLINUX"
+fi
+
 if $CACHEFLUSHFLAG
 then # print path
 TILERACFLAGS="${TILERACFLAGS} -DCACHEFLUSH"
@@ -699,10 +725,13 @@ then # GC_PROFILE version
 TILERACFLAGS="${TILERACFLAGS} -DGC_PROFILE"
 fi
 
-cp $ROBUSTROOT/Tilera/Runtime/$MAKEFILE ./Makefile
-cp $ROBUSTROOT/Tilera/Runtime/$SIMHVC ./sim.hvc
-cp $ROBUSTROOT/Tilera/Runtime/$PCIHVC ./pci.hvc
-cp $ROBUSTROOT/Tilera/Runtime/bamboo-vmlinux-pci.hvc ./bamboo-vmlinux-pci.hvc
+cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$MAKEFILE ./Makefile
+if $TILERABMEFLAG
+then # TILERABMEFLAG
+cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$SIMHVC ./sim.hvc
+cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$PCIHVC ./pci.hvc
+cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/bamboo-vmlinux-pci.hvc ./bamboo-vmlinux-pci.hvc
+fi
 cp ../Runtime/multicoretask.c ./
 cp ../Runtime/multicoreruntime.c ./
 cp ../Runtime/Queue.c ./
@@ -730,13 +759,16 @@ cp ../Runtime/multicorehelper.h ./
 cp ../Runtime/MGCHash.h ./
 cp ../Tilera/Runtime/*.c ./
 cp ../Tilera/Runtime/*.h ./
+cp ../Tilera/Runtime/$TILERA_INDIR/*.c ./
+cp ../Tilera/Runtime/$TILERA_INDIR/*.h ./
+cp ../Tilera/Runtime/$TILERA_INDIR/*.S ./
 cp ../Tilera/lib/* ./
 cp ../$tmpbuilddirectory/*.c ./
 cp ../$tmpbuilddirectory/*.h ./
 
 make
 
-else #!RAWFLAG && !TILERAFLAG
+else #!RAWFLAG && !TILERABMEFLAG  && ! TILERAZLINUXFLAG
 cd $CURDIR 
 
 INCLUDES="$INCLUDES -I$ROBUSTROOT/Runtime -I. -IRuntime/include \
-- 
2.34.1