From: jihoonl <jihoonl>
Date: Fri, 21 May 2010 21:48:38 +0000 (+0000)
Subject: single failure
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=aeda07c16896c37b04b5943359247fbcc6e83363;p=IRC.git

single failure
---

diff --git a/Robust/src/Runtime/DSTM/interface_recovery/dstmserver.c b/Robust/src/Runtime/DSTM/interface_recovery/dstmserver.c
index ac5defb4..180f67b5 100644
--- a/Robust/src/Runtime/DSTM/interface_recovery/dstmserver.c
+++ b/Robust/src/Runtime/DSTM/interface_recovery/dstmserver.c
@@ -97,6 +97,7 @@ int dstmInit(void) {
   
   okCommit = TRANS_OK;
   currentEpoch = 1;
+  leader_index = -1;
 
 #endif
 
@@ -259,18 +260,20 @@ unsigned int checkIfAnyMachineDead(int* socklist)
       clearDeadThreadsNotification();
     }
     else {
-      send_data(socklist[i],&control,sizeof(char));
-
-      if(recv_data(socklist[i], &response, sizeof(char)) < 0) {
-        // if machine is dead, returns index of socket
-        return i;
-      }
-      else {
-        // machine responded
-        if(response != LIVE) {
+      if(leader_index >= 0 ) {
+        send_data(socklist[i],&control,sizeof(char));
+  
+        if(recv_data(socklist[i], &response, sizeof(char)) < 0) {
+          // if machine is dead, returns index of socket
           return i;
         }
-      } // end else
+        else {
+          // machine responded
+          if(response != LIVE) {
+            return i;
+          }
+        } // end else
+      }
     }
 
     sleep(numLiveHostsInSystem);  // wait for seconds for next checking
@@ -304,9 +307,6 @@ void *dstmAccept(void *acceptfd) {
 	unsigned int *oidarry, numoid, mid, threadid;
     int n, v;
 
-#ifdef DEBUG
-	printf("%s-> Entering dstmAccept\n", __func__);	fflush(stdout);
-#endif
 	/* Receive control messages from other machines */
 	while(1) {
 		int ret=recv_data_errorcode((int)acceptfd, &control, sizeof(char));
@@ -320,9 +320,6 @@ void *dstmAccept(void *acceptfd) {
 	//		exit(0);
 			break;
 		}
-#ifdef DEBUG
-		printf("%s-> dstmAccept control = %d\n", __func__, (int)control);
-#endif
 		switch(control) {
 			case READ_REQUEST:
 #ifdef DEBUG
@@ -566,6 +563,11 @@ void *dstmAccept(void *acceptfd) {
         printf("RESTART!!!\n");
         okCommit = TRANS_OK;
         pthread_mutex_unlock(&liveHosts_mutex);
+
+        pthread_mutex_lock(&recovery_mutex);
+        leader_index = -1;
+        pthread_mutex_unlock(&recovery_mutex);
+
         break;
 			case UPDATE_LIVE_HOSTS:
 #ifdef DEBUG
@@ -1954,7 +1956,6 @@ char inspectTransaction(char finalResponse,unsigned int transid,char* debug,int
     // if decision is not lost and okCommit is not TRANS_FLAG, get out of this loop
     while(!((tNode->decision != DECISION_LOST) && (okCommit != TRANS_FLAG))) { 
 //      printf("%s -> transID : %u decision : %d is waiting flag : %d\n",debug,tNode->transid,tNode->decision,TRANS_FLAG);
-//      sleep(3);
       randomdelay();
     }
 
diff --git a/Robust/src/Runtime/DSTM/interface_recovery/trans.c b/Robust/src/Runtime/DSTM/interface_recovery/trans.c
index 731fab8b..63431d08 100644
--- a/Robust/src/Runtime/DSTM/interface_recovery/trans.c
+++ b/Robust/src/Runtime/DSTM/interface_recovery/trans.c
@@ -160,10 +160,6 @@ GDBSEND1:
     else if( numbytes < 0) {    
       // Receive returned an error.
       // Analyze underlying cause
-#ifdef DEBUG
-      printf("%s -> fd : %d errno = %d %s\n",__func__, fd, errno,strerror(errno));
-      fflush(stdout);
-#endif
       if(errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) {
         // machine has failed
         //
@@ -171,9 +167,6 @@ GDBSEND1:
         // when we start send and finish send see if it is longer
         // than our threshold
         //
-#ifdef DEBUG
-        printf("%s -> EAGAIN : %s\n",__func__,(errno == EAGAIN)?"TRUE":"FALSE");
-#endif
         return -1;
       } else {
 #ifdef GDBDEBUG
@@ -181,9 +174,6 @@ GDBSEND1:
           goto GDBSEND1;    
 #endif
 
-#ifdef DEBUG
-        printf("%s -> Unexpected ERROR!\n",__func__);
-#endif
         return -2;
       }
     }
@@ -202,9 +192,6 @@ GDBSEND1:
     }
 #endif
   } // close while loop
-#ifdef DEBUG
-  printf("%s-> Exiting\n", __func__);
-#endif
   return 0; // completed sending data
 }
 
@@ -348,17 +335,11 @@ void recv_data_buf(int fd, struct readstruct * readbuffer, void *buffer, int buf
 }
 
 int recv_data_errorcode(int fd, void *buf, int buflen) {
-#ifdef DEBUG
-  printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
-#endif
   char *buffer = (char *)(buf);
   int size = buflen;
   int numbytes;
   while (size > 0) {
     numbytes = recv(fd, buffer, size, 0);
-#ifdef DEBUG
-    printf("%s-> numbytes: %d\n", __func__, numbytes);
-#endif
     if (numbytes==0)
       return 0;
     else if (numbytes == -1) {
@@ -370,9 +351,6 @@ int recv_data_errorcode(int fd, void *buf, int buflen) {
     buffer += numbytes;
     size -= numbytes;
   }
-#ifdef DEBUG
-  printf("%s-> Exiting\n", __func__);
-#endif
   return 1;
 }
 
@@ -1832,12 +1810,12 @@ void restoreDuplicationState(unsigned int deadHost,unsigned int epoch_num)
   printf("%s -> Entering\n",__func__);
   int* sdlist;
   tlist_t* tList;
+  int flag = 0;
 
 #ifdef RECOVERYSTATS
   printf("Recovery Start\n");
   long long st;
   long long fi;
-  int flag = 0;
   unsigned int dupeSize = 0;  // to calculate the size of backed up data
 
   st = myrdtsc(); // to get clock
@@ -3832,8 +3810,8 @@ void reqClearNotifyList(unsigned int oid)
       return;
   }
   else {
-    printf("%s -> Pmid = %s\n",__func__,midtoIPString(pmid));
-    printf("%s -> Bmid = %s\n",__func__,midtoIPString(bmid));
+//    printf("%s -> Pmid = %s\n",__func__,midtoIPString(pmid));
+//    printf("%s -> Bmid = %s\n",__func__,midtoIPString(bmid));
     
     msg[0] = CLEAR_NOTIFY_LIST;
     *((unsigned int *)(&msg[1])) = oid;
@@ -3861,12 +3839,10 @@ void printRecoveryStat() {
   int i;
   for(i=0; i < numRecovery;i++) {
     printf("Dead Machine = %s\n",midtoIPString(recoverStat[i].deadMachine));
-    printf("Recoveryed data(byte) = %u\n",recoverStat[i].recoveredData);
     printf("Recovery Time(ms) = %ld\n",recoverStat[i].elapsedTime);
   }
   printf("**************************\n\n");
   fflush(stdout);
-  fflush(stdout);
 #else
   printf("No stat\n");
 #endif