cf7f77e707c8a5ebd33f2419854303b8e7e64349
[IRC.git] / Robust / src / Runtime / DSTM / interface / trans.c
1 #include "dstm.h"
2 #include "ip.h"
3 #include "clookup.h"
4 #include "machinepile.h"
5 #include "mlookup.h"
6 #include "llookup.h"
7 #include "plookup.h"
8 #include "prelookup.h"
9 #include "queue.h"
10 #include <pthread.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <netdb.h>
14 #include <netinet/in.h>
15 #include <sys/types.h>
16 #include <unistd.h>
17 #include <errno.h>
18 #include <time.h>
19 #include <string.h>
20 #include <pthread.h>
21
22 #define LISTEN_PORT 2156
23 #define RECEIVE_BUFFER_SIZE 2048
24 #define NUM_THREADS 10
25 #define PREFETCH_CACHE_SIZE 1048576 //1MB
26 #define CONFIG_FILENAME "dstm.conf"
27
28 /* Global Variables */
29 extern int classsize[];
30 extern primarypfq_t pqueue; //Shared prefetch queue
31 extern mcpileq_t mcqueue;  //Shared queue containing prefetch requests sorted by remote machineids 
32 objstr_t *prefetchcache; //Global Prefetch cache
33 pthread_mutex_t prefetchcache_mutex;// Mutex to lock Prefetch Cache
34 pthread_mutexattr_t prefetchcache_mutex_attr; /* Attribute for lock to make it a recursive lock */
35 extern pthread_mutex_t mainobjstore_mutex;// Mutex to lock main Object store
36 extern prehashtable_t pflookup; //Global Prefetch cache's lookup table
37 pthread_t wthreads[NUM_THREADS]; //Worker threads for working on the prefetch queue
38 pthread_t tPrefetch;            /* Primary Prefetch thread that processes the prefetch queue */
39 extern objstr_t *mainobjstore;
40 unsigned int myIpAddr;
41 unsigned int *hostIpAddrs;
42 int sizeOfHostArray;
43 int numHostsInSystem;
44 int myIndexInHostArray;
45 unsigned int oidsPerBlock;
46 unsigned int oidMin;
47 unsigned int oidMax;
48
49 plistnode_t *createPiles(transrecord_t *);
50 inline int arrayLength(int *array) {
51         int i;
52         for(i=0 ;array[i] != -1; i++)
53                 ;
54         return i;
55 }
56 inline int findmax(int *array, int arraylength) {
57         int max, i;
58         max = array[0];
59         for(i = 0; i < arraylength; i++){
60                 if(array[i] > max) {
61                         max = array[i];
62                 }
63         }
64         return max;
65 }
66 /* This function is a prefetch call generated by the compiler that
67  * populates the shared primary prefetch queue*/
68 void prefetch(int ntuples, unsigned int *oids, unsigned short *endoffsets, short *arrayfields) {
69         int qnodesize;
70         int len = 0;
71
72         /* Allocate for the queue node*/
73         char *node;
74         qnodesize = sizeof(prefetchqelem_t) + sizeof(int) + ntuples * (sizeof(short) + sizeof(unsigned int)) + endoffsets[ntuples - 1] * sizeof(short); 
75         if((node = calloc(1, qnodesize)) == NULL) {
76                 printf("Calloc Error %s, %d\n", __FILE__, __LINE__);
77                 return;
78         }
79         /* Set queue node values */
80         len = sizeof(prefetchqelem_t);
81         memcpy(node + len, &ntuples, sizeof(int));
82         len += sizeof(int);
83         memcpy(node + len, oids, ntuples*sizeof(unsigned int));
84         len += ntuples * sizeof(unsigned int);
85         memcpy(node + len, endoffsets, ntuples*sizeof(short));
86         len += ntuples * sizeof(short);
87         memcpy(node + len, arrayfields, endoffsets[ntuples-1]*sizeof(short));
88         /* Lock and insert into primary prefetch queue */
89         pthread_mutex_lock(&pqueue.qlock);
90         pre_enqueue((prefetchqelem_t *)node);
91         pthread_cond_signal(&pqueue.qcond);
92         pthread_mutex_unlock(&pqueue.qlock);
93 }
94
95 /* This function starts up the transaction runtime. */
96 int dstmStartup(const char * option) {
97   pthread_t thread_Listen;
98   pthread_attr_t attr;
99   int master=option!=NULL && strcmp(option, "master")==0;
100
101         if (processConfigFile() != 0)
102                 return 0; //TODO: return error value, cause main program to exit
103
104   dstmInit();
105   transInit();
106
107   if (master) {
108     pthread_attr_init(&attr);
109     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
110     pthread_create(&thread_Listen, &attr, dstmListen, NULL);
111     return 1;
112   } else {
113     dstmListen();
114     return 0;
115   }
116
117 }
118
119
120 /* This function initiates the prefetch thread
121  * A queue is shared between the main thread of execution
122  * and the prefetch thread to process the prefetch call
123  * Call from compiler populates the shared queue with prefetch requests while prefetch thread
124  * processes the prefetch requests */
125 void transInit() {
126         int t, rc;
127         int retval;
128         //Create and initialize prefetch cache structure
129         prefetchcache = objstrCreate(PREFETCH_CACHE_SIZE);
130
131         /* Initialize attributes for mutex */
132         pthread_mutexattr_init(&prefetchcache_mutex_attr);
133         pthread_mutexattr_settype(&prefetchcache_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
134         
135         pthread_mutex_init(&prefetchcache_mutex, &prefetchcache_mutex_attr);
136
137         //Create prefetch cache lookup table
138         if(prehashCreate(HASH_SIZE, LOADFACTOR))
139                 return; //Failure
140         //Initialize primary shared queue
141         queueInit();
142         //Initialize machine pile w/prefetch oids and offsets shared queue
143         mcpileqInit();
144         //Create the primary prefetch thread 
145         
146         do {
147           retval=pthread_create(&tPrefetch, NULL, transPrefetch, NULL);
148         } while(retval!=0);
149         pthread_detach(tPrefetch);
150
151         //Create and Initialize a pool of threads 
152         /* Threads are active for the entire period runtime is running */
153         for(t = 0; t< NUM_THREADS; t++) {
154           do {
155                 rc = pthread_create(&wthreads[t], NULL, mcqProcess, (void *)t);
156           } while(rc!=0);
157           pthread_detach(wthreads[t]);
158         }
159 }
160
161 /* This function stops the threads spawned */
162 void transExit() {
163         int t;
164         pthread_cancel(tPrefetch);
165         for(t = 0; t < NUM_THREADS; t++)
166                 pthread_cancel(wthreads[t]);
167
168         return;
169 }
170
171 /* This functions inserts randowm wait delays in the order of msec
172  * Mostly used when transaction commits retry*/
173 void randomdelay(void)
174 {
175         struct timespec req, rem;
176         time_t t;
177
178         t = time(NULL);
179         req.tv_sec = 0;
180         req.tv_nsec = (long)(1000000 + (t%10000000)); //1-11 msec
181         //nanosleep(&req, &rem);
182         nanosleep(&req, NULL);
183         return;
184 }
185
186 /* This function initializes things required in the transaction start*/
187 transrecord_t *transStart()
188 {
189         printf("Starting transaction\n");
190         transrecord_t *tmp = malloc(sizeof(transrecord_t));
191         tmp->cache = objstrCreate(1048576);
192         tmp->lookupTable = chashCreate(HASH_SIZE, LOADFACTOR);
193 #ifdef COMPILER
194         tmp->revertlist=NULL;
195 #endif
196         return tmp;
197 }
198
199 /* This function finds the location of the objects involved in a transaction
200  * and returns the pointer to the object if found in a remote location */
201 objheader_t *transRead(transrecord_t *record, unsigned int oid) {       
202         unsigned int machinenumber;
203         objheader_t *tmp, *objheader;
204         objheader_t *objcopy;
205         int size, rc, found = 0;
206         void *buf;
207         struct timespec ts;
208         struct timeval tp;
209         
210         rc = gettimeofday(&tp, NULL);
211
212         /* Convert from timeval to timespec */
213         ts.tv_nsec = tp.tv_usec * 1000;
214
215         /* Search local transaction cache */
216         if((objheader = (objheader_t *)chashSearch(record->lookupTable, oid)) != NULL){
217 #ifdef COMPILER
218           return &objheader[1];
219 #else
220           return objheader;
221 #endif
222         } else if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
223                 /* Look up in machine lookup table  and copy  into cache*/
224                 GETSIZE(size, objheader);
225                 size += sizeof(objheader_t);
226                 //TODO:Lock the local trans cache while copying the object here
227                 objcopy = objstrAlloc(record->cache, size);
228                 memcpy(objcopy, (void *)objheader, size);
229                 /* Insert into cache's lookup table */
230                 chashInsert(record->lookupTable, OID(objheader), objcopy); 
231 #ifdef COMPILER
232                 return &objcopy[1];
233 #else
234                 return objcopy;
235 #endif
236         } else if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) { /* Look up in prefetch cache */
237                 found = 1;
238                 GETSIZE(size, tmp);
239                 size+=sizeof(objheader_t);
240                 //TODO:Lock the local  trans cache while copying the object here
241                 objcopy = objstrAlloc(record->cache, size);
242                 memcpy(objcopy, (void *)tmp, size);
243                 /* Insert into cache's lookup table */
244                 chashInsert(record->lookupTable, OID(tmp), objcopy); 
245 #ifdef COMPILER
246                 return &objcopy[1];
247 #else
248                 return objcopy;
249 #endif
250         } else {
251                 /*If object not found in prefetch cache then block until object appears in the prefetch cache */
252                 pthread_mutex_lock(&pflookup.lock);
253                 while(!found) {
254                         rc = pthread_cond_timedwait(&pflookup.cond, &pflookup.lock, &ts);
255                         if(rc == ETIMEDOUT) {
256                                 printf("Wait timed out\n");
257                                 /* Check Prefetch cache again */
258                                 if((tmp =(objheader_t *) prehashSearch(oid)) != NULL) {
259                                         found = 1;
260                                         GETSIZE(size,tmp);
261                                         size+=sizeof(objheader_t);
262                                         objcopy = objstrAlloc(record->cache, size);
263                                         memcpy(objcopy, (void *)tmp, size);
264                                         chashInsert(record->lookupTable, OID(tmp), objcopy); 
265                                         pthread_mutex_unlock(&pflookup.lock);
266 #ifdef COMPILER
267                                         return &objcopy[1];
268 #else
269                                         return objcopy;
270 #endif
271                                 } else {
272                                         pthread_mutex_unlock(&pflookup.lock);
273                                         break;
274                                 }
275                         }
276                 }
277
278                 /* Get the object from the remote location */
279                 machinenumber = lhashSearch(oid);
280                 objcopy = getRemoteObj(record, machinenumber, oid);
281                 if(objcopy == NULL) {
282                         printf("Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
283                         return NULL;
284                 } else {
285 #ifdef COMPILER
286                   return &objcopy[1];
287 #else
288                   return objcopy;
289 #endif
290                 }
291         }
292 }
293
294 /* This function creates objects in the transaction record */
295 objheader_t *transCreateObj(transrecord_t *record, unsigned int size)
296 {
297   objheader_t *tmp = (objheader_t *) objstrAlloc(record->cache, (sizeof(objheader_t) + size));
298   OID(tmp) = getNewOID();
299   tmp->version = 1;
300   tmp->rcount = 1;
301   STATUS(tmp) = NEW;
302   chashInsert(record->lookupTable, OID(tmp), tmp);
303 #ifdef COMPILER
304   return &tmp[1]; //want space after object header
305 #else
306   return tmp;
307 #endif
308 }
309
310 /* This function creates machine piles based on all machines involved in a
311  * transaction commit request */
312 plistnode_t *createPiles(transrecord_t *record) {
313         int i = 0;
314         unsigned int size;/* Represents number of bins in the chash table */
315         chashlistnode_t *curr, *ptr, *next;
316         plistnode_t *pile = NULL;
317         unsigned int machinenum;
318         void *localmachinenum;
319         objheader_t *headeraddr;
320         
321         ptr = record->lookupTable->table;
322         size = record->lookupTable->size;
323
324         for(i = 0; i < size ; i++) {
325                 curr = &ptr[i];
326                 /* Inner loop to traverse the linked list of the cache lookupTable */
327                 while(curr != NULL) {
328                         //if the first bin in hash table is empty
329                         if(curr->key == 0) {
330                                 break;
331                         }
332                         next = curr->next;
333
334                         if ((headeraddr = chashSearch(record->lookupTable, curr->key)) == NULL) {
335                                 printf("Error: No such oid %s, %d\n", __FILE__, __LINE__);
336                                 return NULL;
337                         }
338
339                         //Get machine location for object id (and whether local or not)
340                         if (STATUS(headeraddr) & NEW || mhashSearch(curr->key) != NULL) {
341                                 machinenum = myIpAddr;
342                         } else  if ((machinenum = lhashSearch(curr->key)) == 0) {
343                                 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
344                                 return NULL;
345                         }
346
347                         //Make machine groups
348                         if ((pile = pInsert(pile, headeraddr, machinenum, record->lookupTable->numelements)) == NULL) {
349                                 printf("pInsert error %s, %d\n", __FILE__, __LINE__);
350                                 return NULL;
351                         }
352
353                         curr = next;
354                 }
355         }
356         return pile; 
357 }
358
359 /* This function initiates the transaction commit process
360  * Spawns threads for each of the new connections with Participants 
361  * and creates new piles by calling the createPiles(), 
362  * Sends a transrequest() to each remote machines for objects found remotely 
363  * and calls handleLocalReq() to process objects found locally */
364 int transCommit(transrecord_t *record) {        
365         unsigned int tot_bytes_mod, *listmid;
366         plistnode_t *pile, *pile_ptr;
367         int i, j, rc, val;
368         int pilecount, offset, threadnum, trecvcount;
369         char buffer[RECEIVE_BUFFER_SIZE],control;
370         char transid[TID_LEN];
371         trans_req_data_t *tosend;
372         trans_commit_data_t transinfo;
373         static int newtid = 0;
374         char treplyctrl = 0, treplyretry = 0; /* keeps track of the common response that needs to be sent */
375         char localstat = 0;
376
377         do {
378                 trecvcount = 0;
379                 threadnum = 0;
380
381                 /* Look through all the objects in the transaction record and make piles 
382                  * for each machine involved in the transaction*/
383                 pile_ptr = pile = createPiles(record);
384
385                 /* Create the packet to be sent in TRANS_REQUEST */
386
387                 /* Count the number of participants */
388                 pilecount = pCount(pile);
389
390                 /* Create a list of machine ids(Participants) involved in transaction   */
391                 if((listmid = calloc(pilecount, sizeof(unsigned int))) == NULL) {
392                         printf("Calloc error %s, %d\n", __FILE__, __LINE__);
393                         free(record);
394                         return 1;
395                 }               
396                 pListMid(pile, listmid);
397
398
399                 /* Initialize thread variables,
400                  * Spawn a thread for each Participant involved in a transaction */
401                 pthread_t thread[pilecount];
402                 pthread_attr_t attr;                    
403                 pthread_cond_t tcond;
404                 pthread_mutex_t tlock;
405                 pthread_mutex_t tlshrd;
406
407                 thread_data_array_t *thread_data_array;
408                 if((thread_data_array = (thread_data_array_t *) malloc(sizeof(thread_data_array_t)*pilecount)) == NULL) {
409                         printf("Malloc error %s, %d\n", __FILE__, __LINE__);
410                         pthread_cond_destroy(&tcond);
411                         pthread_mutex_destroy(&tlock);
412                         pDelete(pile_ptr);
413                         free(listmid);
414                         free(record);
415                         return 1;
416                 }
417
418                 local_thread_data_array_t *ltdata;
419                 if((ltdata = calloc(1, sizeof(local_thread_data_array_t))) == NULL) {
420                         printf("Calloc error %s, %d\n", __FILE__, __LINE__);
421                         pthread_cond_destroy(&tcond);
422                         pthread_mutex_destroy(&tlock);
423                         pDelete(pile_ptr);
424                         free(listmid);
425                         free(thread_data_array);
426                         free(record);
427                         return 1;
428                 }
429
430                 thread_response_t rcvd_control_msg[pilecount];  /* Shared thread array that keeps track of responses of participants */
431
432                 /* Initialize and set thread detach attribute */
433                 pthread_attr_init(&attr);
434                 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
435                 pthread_mutex_init(&tlock, NULL);
436                 pthread_cond_init(&tcond, NULL);
437
438                 /* Process each machine pile */
439                 while(pile != NULL) {
440                         //Create transaction id
441                         newtid++;
442                         if ((tosend = calloc(1, sizeof(trans_req_data_t))) == NULL) {
443                                 printf("Calloc error %s, %d\n", __FILE__, __LINE__);
444                                 pthread_cond_destroy(&tcond);
445                                 pthread_mutex_destroy(&tlock);
446                                 pDelete(pile_ptr);
447                                 free(listmid);
448                                 free(thread_data_array);
449                                 free(ltdata);
450                                 free(record);
451                                 return 1;
452                         }
453                         tosend->f.control = TRANS_REQUEST;
454                         sprintf(tosend->f.trans_id, "%x_%d", pile->mid, newtid);
455                         tosend->f.mcount = pilecount;
456                         tosend->f.numread = pile->numread;
457                         tosend->f.nummod = pile->nummod;
458                         tosend->f.numcreated = pile->numcreated;
459                         tosend->f.sum_bytes = pile->sum_bytes;
460                         tosend->listmid = listmid;
461                         tosend->objread = pile->objread;
462                         tosend->oidmod = pile->oidmod;
463                         tosend->oidcreated = pile->oidcreated;
464                         thread_data_array[threadnum].thread_id = threadnum;
465                         thread_data_array[threadnum].mid = pile->mid;
466                         thread_data_array[threadnum].buffer = tosend;
467                         thread_data_array[threadnum].recvmsg = rcvd_control_msg;
468                         thread_data_array[threadnum].threshold = &tcond;
469                         thread_data_array[threadnum].lock = &tlock;
470                         thread_data_array[threadnum].count = &trecvcount;
471                         thread_data_array[threadnum].replyctrl = &treplyctrl;
472                         thread_data_array[threadnum].replyretry = &treplyretry;
473                         thread_data_array[threadnum].rec = record;
474                         /* If local do not create any extra connection */
475                         if(pile->mid != myIpAddr) { /* Not local */
476                           do {
477                                 rc = pthread_create(&thread[threadnum], &attr, transRequest, (void *) &thread_data_array[threadnum]);  
478                           } while(rc!=0);
479                                 if(rc) {
480                                         perror("Error in pthread create\n");
481                                         pthread_cond_destroy(&tcond);
482                                         pthread_mutex_destroy(&tlock);
483                                         pDelete(pile_ptr);
484                                         free(listmid);
485                                         for (i = 0; i < threadnum; i++)
486                                                 free(thread_data_array[i].buffer);
487                                         free(thread_data_array);
488                                         free(ltdata);
489                                         free(record);
490                                         return 1;
491                                 }
492                         } else { /*Local*/
493                                 ltdata->tdata = &thread_data_array[threadnum];
494                                 ltdata->transinfo = &transinfo;
495                                 do {
496                                 val = pthread_create(&thread[threadnum], &attr, handleLocalReq, (void *) ltdata);
497                                 } while(val!=0);
498                                 if(val) {
499                                         perror("Error in pthread create\n");
500                                         pthread_cond_destroy(&tcond);
501                                         pthread_mutex_destroy(&tlock);
502                                         pDelete(pile_ptr);
503                                         free(listmid);
504                                         for (i = 0; i < threadnum; i++)
505                                                 free(thread_data_array[i].buffer);
506                                         free(thread_data_array);
507                                         free(ltdata);
508                                         free(record);
509                                         return 1;
510                                 }
511                         }
512
513                         threadnum++;            
514                         pile = pile->next;
515                 }
516
517                 /* Free attribute and wait for the other threads */
518                 pthread_attr_destroy(&attr);
519                 
520                 for (i = 0; i < pilecount; i++) {
521                         rc = pthread_join(thread[i], NULL);
522                         if(rc)
523                         {
524                                 printf("ERROR return code from pthread_join() is %d\n", rc);
525                                 pthread_cond_destroy(&tcond);
526                                 pthread_mutex_destroy(&tlock);
527                                 pDelete(pile_ptr);
528                                 free(listmid);
529                                 for (j = i; j < pilecount; j++)
530                                         free(thread_data_array[j].buffer);
531                                 free(thread_data_array);
532                                 free(ltdata);
533                                 free(record);
534                                 return 1;
535                         }
536                         free(thread_data_array[i].buffer);
537                 }
538         
539
540                 /* Free resources */    
541                 pthread_cond_destroy(&tcond);
542                 pthread_mutex_destroy(&tlock);
543                 free(listmid);
544                 pDelete(pile_ptr);
545                 free(thread_data_array);
546                 free(ltdata);
547
548                 /* wait a random amount of time */
549                 if (treplyretry == 1) {
550                         randomdelay();
551                 }
552
553         /* Retry trans commit procedure if not sucessful in the first try */
554         } while (treplyretry == 1);
555         
556         /* Free Resources */
557         objstrDelete(record->cache);
558         chashDelete(record->lookupTable);
559         free(record);
560         return 0;
561 }
562
563 /* This function sends information involved in the transaction request 
564  * to participants and accepts a response from particpants.
565  * It calls decideresponse() to decide on what control message 
566  * to send next to participants and sends the message using sendResponse()*/
567 void *transRequest(void *threadarg) {
568         int sd, i, n;
569         struct sockaddr_in serv_addr;
570         struct hostent *server;
571         thread_data_array_t *tdata;
572         objheader_t *headeraddr;
573         char buffer[RECEIVE_BUFFER_SIZE], control, recvcontrol;
574         char machineip[16], retval;
575
576         tdata = (thread_data_array_t *) threadarg;
577
578         /* Send Trans Request */
579         if ((sd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
580                 perror("Error in socket for TRANS_REQUEST\n");
581                 pthread_exit(NULL);
582         }
583         bzero((char*) &serv_addr, sizeof(serv_addr));
584         serv_addr.sin_family = AF_INET;
585         serv_addr.sin_port = htons(LISTEN_PORT);
586         midtoIP(tdata->mid,machineip);
587         machineip[15] = '\0';
588         serv_addr.sin_addr.s_addr = inet_addr(machineip);
589         /* Open Connection */
590         if (connect(sd, (struct sockaddr *) &serv_addr, sizeof(struct sockaddr)) < 0) {
591                 perror("Error in connect for TRANS_REQUEST\n");
592                 close(sd);
593                 pthread_exit(NULL);
594         }
595
596         /* Send bytes of data with TRANS_REQUEST control message */
597         if (send(sd, &(tdata->buffer->f), sizeof(fixed_data_t),MSG_NOSIGNAL) < sizeof(fixed_data_t)) {
598                 perror("Error sending fixed bytes for thread\n");
599                 close(sd);
600                 pthread_exit(NULL);
601         }
602         /* Send list of machines involved in the transaction */
603         {
604                 int size=sizeof(unsigned int)*tdata->buffer->f.mcount;
605                 if (send(sd, tdata->buffer->listmid, size, MSG_NOSIGNAL) < size) {
606                         perror("Error sending list of machines for thread\n");
607                         close(sd);
608                         pthread_exit(NULL);
609                 }
610         }
611         /* Send oids and version number tuples for objects that are read */
612         {
613                 int size=(sizeof(unsigned int)+sizeof(short))*tdata->buffer->f.numread;
614                 if (send(sd, tdata->buffer->objread, size, MSG_NOSIGNAL) < size) {
615                         perror("Error sending tuples for thread\n");
616                         close(sd);
617                         pthread_exit(NULL);
618                 }
619         }
620         /* Send objects that are modified */
621         for(i = 0; i < tdata->buffer->f.nummod ; i++) {
622                 int size;
623                 headeraddr = chashSearch(tdata->rec->lookupTable, tdata->buffer->oidmod[i]);
624                 GETSIZE(size,headeraddr);
625                 size+=sizeof(objheader_t);
626                 if (send(sd, headeraddr, size, MSG_NOSIGNAL)  < size) {
627                         perror("Error sending obj modified for thread\n");
628                         close(sd);
629                         pthread_exit(NULL);
630                 }
631         }
632
633         /* Read control message from Participant */
634         if((n = read(sd, &control, sizeof(char))) <= 0) {
635                 perror("Error in reading control message from Participant\n");
636                 close(sd);
637                 pthread_exit(NULL);
638         }
639         recvcontrol = control;
640
641         /* Update common data structure and increment count */
642         tdata->recvmsg[tdata->thread_id].rcv_status = recvcontrol;
643
644         /* Lock and update count */
645         /* Thread sleeps until all messages from pariticipants are received by coordinator */
646         pthread_mutex_lock(tdata->lock);
647
648         (*(tdata->count))++; /* keeps track of no of messages received by the coordinator */
649
650         /* Wake up the threads and invoke decideResponse (once) */
651         if(*(tdata->count) == tdata->buffer->f.mcount) {
652                 decideResponse(tdata); 
653                 pthread_cond_broadcast(tdata->threshold);
654         } else {
655                 pthread_cond_wait(tdata->threshold, tdata->lock);
656         }
657         pthread_mutex_unlock(tdata->lock);
658
659         /* Send the final response such as TRANS_COMMIT or TRANS_ABORT t
660          * to all participants in their respective socket */
661         if (sendResponse(tdata, sd) == 0) { 
662                 printf("sendResponse returned error %s,%d\n", __FILE__, __LINE__);
663                 close(sd);
664                 pthread_exit(NULL);
665         }
666
667         /* Close connection */
668         close(sd);
669         pthread_exit(NULL);
670 }
671
672 /* This function decides the reponse that needs to be sent to 
673  * all Participant machines after the TRANS_REQUEST protocol */
674 void decideResponse(thread_data_array_t *tdata) {
675         char control;
676         int i, transagree = 0, transdisagree = 0, transsoftabort = 0; /* Counters to formulate decision of what
677                                                                          message to send */
678
679         for (i = 0 ; i < tdata->buffer->f.mcount; i++) {
680                 control = tdata->recvmsg[i].rcv_status; /* tdata: keeps track of all participant responses
681                                                            written onto the shared array */
682                 switch(control) {
683                         default:
684                                 printf("Participant sent unknown message in %s, %d\n", __FILE__, __LINE__);
685                                 /* treat as disagree, pass thru */
686                         case TRANS_DISAGREE:
687                                 transdisagree++;
688                                 break;
689
690                         case TRANS_AGREE:
691                                 transagree++;
692                                 break;
693
694                         case TRANS_SOFT_ABORT:
695                                 transsoftabort++;
696                                 break;
697                 }
698         }
699
700         if(transdisagree > 0) {
701                 /* Send Abort */
702                 *(tdata->replyctrl) = TRANS_ABORT;
703                 *(tdata->replyretry) = 0;
704         } else if(transagree == tdata->buffer->f.mcount){
705                 /* Send Commit */
706                 *(tdata->replyctrl) = TRANS_COMMIT;
707                 *(tdata->replyretry) = 0;
708         } else { 
709                 /* Send Abort in soft abort case followed by retry commiting transaction again*/
710                 *(tdata->replyctrl) = TRANS_ABORT;
711                 *(tdata->replyretry) = 1;
712         }
713
714         return;
715 }
716 /* This function sends the final response to remote machines per thread in their respective socket id 
717  * It returns a char that is only needed to check the correctness of execution of this function inside
718  * transRequest()*/
719 char sendResponse(thread_data_array_t *tdata, int sd) {
720         int n, N, sum, oidcount = 0;
721         char *ptr, retval = 0;
722         unsigned int *oidnotfound;
723
724         /* If the decided response is due to a soft abort and missing objects at the Participant's side */
725         if(tdata->recvmsg[tdata->thread_id].rcv_status == TRANS_SOFT_ABORT) {
726                 /* Read list of objects missing */
727                 if((read(sd, &oidcount, sizeof(int)) != 0) && (oidcount != 0)) {
728                         N = oidcount * sizeof(unsigned int);
729                         if((oidnotfound = calloc(oidcount, sizeof(unsigned int))) == NULL) {
730                                 printf("Calloc error %s, %d\n", __FILE__, __LINE__);
731                                 return 0;
732                         }
733                         ptr = (char *) oidnotfound;
734                         do {
735                                 n = read(sd, ptr+sum, N-sum);
736                                 sum += n;
737                         } while(sum < N && n !=0);
738                 }
739                 retval =  TRANS_SOFT_ABORT;
740         }
741         /* If the decided response is TRANS_ABORT */
742         if(*(tdata->replyctrl) == TRANS_ABORT) {
743                 retval = TRANS_ABORT;
744         } else if(*(tdata->replyctrl) == TRANS_COMMIT) { /* If the decided response is TRANS_COMMIT */
745                 retval = TRANS_COMMIT;
746         }
747
748         if (send(sd, tdata->replyctrl, sizeof(char),MSG_NOSIGNAL) < sizeof(char)) {
749                 perror("Error sending ctrl message for participant\n");
750         }
751
752         return retval;
753 }
754
755 /* This function opens a connection, places an object read request to the 
756  * remote machine, reads the control message and object if available  and 
757  * copies the object and its header to the local cache.
758  * TODO replace mnum and midtoIP() with MACHINE_IP address later */ 
759
760 void *getRemoteObj(transrecord_t *record, unsigned int mnum, unsigned int oid) {
761         int sd, size, val;
762         struct sockaddr_in serv_addr;
763         struct hostent *server;
764         char control;
765         char machineip[16];
766         objheader_t *h;
767         void *objcopy;
768
769         if ((sd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
770                 perror("Error in socket\n");
771                 return NULL;
772         }
773         bzero((char*) &serv_addr, sizeof(serv_addr));
774         serv_addr.sin_family = AF_INET;
775         serv_addr.sin_port = htons(LISTEN_PORT);
776         //serv_addr.sin_addr.s_addr = inet_addr(MACHINE_IP);
777         midtoIP(mnum,machineip);
778         machineip[15] = '\0';
779         serv_addr.sin_addr.s_addr = inet_addr(machineip);
780         /* Open connection */
781         if (connect(sd, (struct sockaddr *) &serv_addr, sizeof(struct sockaddr)) < 0) {
782                 perror("Error in connect\n");
783                 return NULL;
784         }
785         char readrequest[sizeof(char)+sizeof(unsigned int)];
786         readrequest[0] = READ_REQUEST;
787         *((unsigned int *)(&readrequest[1])) = oid;
788         if (send(sd, &readrequest, sizeof(readrequest), MSG_NOSIGNAL) < sizeof(readrequest)) {
789                 perror("Error sending message\n");
790                 return NULL;
791         }
792
793 #ifdef DEBUG1
794         printf("DEBUG -> ready to rcv ...\n");
795 #endif
796         /* Read response from the Participant */
797         if((val = read(sd, &control, sizeof(char))) <= 0) {
798                 perror("No control response for getRemoteObj sent\n");
799                 return NULL;
800         }
801         switch(control) {
802                 case OBJECT_NOT_FOUND:
803                         return NULL;
804                 case OBJECT_FOUND:
805                         /* Read object if found into local cache */
806                         if((val = read(sd, &size, sizeof(int))) <= 0) {
807                                 perror("No size is read from the participant\n");
808                                 return NULL;
809                         }
810                         objcopy = objstrAlloc(record->cache, size);
811                         if((val = read(sd, objcopy, size)) <= 0) {
812                                 perror("No objects are read from the remote participant\n");
813                                 return NULL;
814                         }
815                         /* Insert into cache's lookup table */
816                         chashInsert(record->lookupTable, oid, objcopy); 
817                         break;
818                 default:
819                         printf("Error in recv request from participant on a READ_REQUEST %s, %d\n",__FILE__, __LINE__);
820                         return NULL;
821         }
822         /* Close connection */
823         close(sd);
824         return objcopy;
825 }
826
827 /* This function handles the local objects involved in a transaction commiting process.
828  * It also makes a decision if this local machine sends AGREE or DISAGREE or SOFT_ABORT to coordinator.
829  * Note Coordinator = local machine
830  * It wakes up the other threads from remote participants that are waiting for the coordinator's decision and
831  * based on common agreement it either commits or aborts the transaction.
832  * It also frees the memory resources */
833 void *handleLocalReq(void *threadarg) {
834         int val, i = 0, size, offset = 0;
835         short version;
836         char control = 0, *ptr;
837         unsigned int oid;
838         unsigned int *oidnotfound = NULL, *oidlocked = NULL;
839         void *mobj, *modptr;
840         objheader_t *headptr, *headeraddr;
841         local_thread_data_array_t *localtdata;
842
843         localtdata = (local_thread_data_array_t *) threadarg;
844
845         /* Counters and arrays to formulate decision on control message to be sent */
846         oidnotfound = (unsigned int *) calloc((localtdata->tdata->buffer->f.numread + localtdata->tdata->buffer->f.nummod), sizeof(unsigned int));
847         oidlocked = (unsigned int *) calloc((localtdata->tdata->buffer->f.numread + localtdata->tdata->buffer->f.nummod), sizeof(unsigned int));
848         int objnotfound = 0, objlocked = 0; 
849         int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
850
851         /* modptr points to the beginning of the object store 
852          * created at the Pariticipant */ 
853         pthread_mutex_lock(&mainobjstore_mutex);
854         if ((modptr = objstrAlloc(mainobjstore, localtdata->tdata->buffer->f.sum_bytes)) == NULL) {
855                 printf("objstrAlloc error for modified objects %s, %d\n", __FILE__, __LINE__);
856                 pthread_mutex_unlock(&mainobjstore_mutex);
857                 pthread_exit(NULL);
858         }
859         pthread_mutex_unlock(&mainobjstore_mutex);
860         /* Write modified objects into the mainobject store */
861         for(i = 0; i< localtdata->tdata->buffer->f.nummod; i++) {
862                 headeraddr = chashSearch(localtdata->tdata->rec->lookupTable, localtdata->tdata->buffer->oidmod[i]);
863                 GETSIZE(size,headeraddr);
864                 size+=sizeof(objheader_t);
865                 memcpy((char *)modptr+offset, headeraddr, size);  
866                 offset += size;
867         }
868         /* Write new objects into the mainobject store */
869         for(i = 0; i< localtdata->tdata->buffer->f.numcreated; i++) {
870                 headeraddr = chashSearch(localtdata->tdata->rec->lookupTable, localtdata->tdata->buffer->oidcreated[i]);
871                 GETSIZE(size, headeraddr);
872                 size+=sizeof(objheader_t);
873                 memcpy((char *)modptr+offset, headeraddr, size);  
874                 offset += size;
875         }
876
877         ptr = modptr;
878         offset = 0; //Reset 
879
880         /* Process each oid in the machine pile/ group per thread */
881         for (i = 0; i < localtdata->tdata->buffer->f.numread + localtdata->tdata->buffer->f.nummod; i++) {
882                 if (i < localtdata->tdata->buffer->f.numread) {//Objs only read and not modified
883                         int incr = sizeof(unsigned int) + sizeof(short);// Offset that points to next position in the objread array
884                         incr *= i;
885                         oid = *((unsigned int *)(localtdata->tdata->buffer->objread + incr));
886                         incr += sizeof(unsigned int);
887                         version = *((short *)(localtdata->tdata->buffer->objread + incr));
888                 } else {//Objs modified
889                         int tmpsize;
890                         headptr = (objheader_t *)ptr;
891                         oid = OID(headptr);
892                         version = headptr->version;
893                         GETSIZE(tmpsize, headptr);
894                         ptr += sizeof(objheader_t) + tmpsize;
895                 }
896
897                 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
898
899                 /* Save the oids not found and number of oids not found for later use */
900                 if ((mobj = mhashSearch(oid)) == NULL) {/* Obj not found */
901                         /* Save the oids not found and number of oids not found for later use */
902                         oidnotfound[objnotfound] = oid;
903                         objnotfound++;
904                 } else { /* If Obj found in machine (i.e. has not moved) */
905                         /* Check if Obj is locked by any previous transaction */
906                         if ((STATUS(((objheader_t *)mobj)) & LOCK) == LOCK) {
907                                 if (version == ((objheader_t *)mobj)->version) {      /* If locked then match versions */ 
908                                         v_matchlock++;
909                                 } else {/* If versions don't match ...HARD ABORT */
910                                         v_nomatch++;
911                                         /* Send TRANS_DISAGREE to Coordinator */
912                                         localtdata->tdata->recvmsg[localtdata->tdata->thread_id].rcv_status = TRANS_DISAGREE;
913                                 }
914                         } else {/* If Obj is not locked then lock object */
915                                 STATUS(((objheader_t *)mobj)) |= LOCK;
916                                 //TODO Remove this for Testing
917                                 //randomdelay(); -- Why is this here.  BCD
918
919                                 /* Save all object oids that are locked on this machine during this transaction request call */
920                                 oidlocked[objlocked] = OID(((objheader_t *)mobj));
921                                 objlocked++;
922                                 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
923                                         v_matchnolock++;
924                                 } else { /* If versions don't match ...HARD ABORT */
925                                         v_nomatch++;
926                                         /* Send TRANS_DISAGREE to Coordinator */
927                                         localtdata->tdata->recvmsg[localtdata->tdata->thread_id].rcv_status = TRANS_DISAGREE;
928                                 }
929                         }
930                 }
931         }
932
933         /* Condition to send TRANS_AGREE */
934         if(v_matchnolock == localtdata->tdata->buffer->f.numread + localtdata->tdata->buffer->f.nummod) {
935                 localtdata->tdata->recvmsg[localtdata->tdata->thread_id].rcv_status = TRANS_AGREE;
936         }
937         /* Condition to send TRANS_SOFT_ABORT */
938         if((v_matchlock > 0 && v_nomatch == 0) || (objnotfound > 0 && v_nomatch == 0)) {
939                 localtdata->tdata->recvmsg[localtdata->tdata->thread_id].rcv_status = TRANS_SOFT_ABORT;
940                 //TODO  currently the only soft abort case that is supported is when object locked by previous
941                 //transaction => v_matchlock > 0 
942                 //The other case for SOFT ABORT i.e. when object is not found but versions match is not supported 
943                 /* Send number of oids not found and the missing oids if objects are missing in the machine */
944                 /* TODO Remember to store the oidnotfound for later use
945                    if(objnotfound != 0) {
946                    int size = sizeof(unsigned int)* objnotfound;
947                    }
948                    */
949         }
950
951         /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
952          * if Participant receives a TRANS_COMMIT */
953         localtdata->transinfo->objlocked = oidlocked;
954         localtdata->transinfo->objnotfound = oidnotfound;
955         localtdata->transinfo->modptr = modptr;
956         localtdata->transinfo->numlocked = objlocked;
957         localtdata->transinfo->numnotfound = objnotfound;
958
959         /* Lock and update count */
960         //Thread sleeps until all messages from pariticipants are received by coordinator
961         pthread_mutex_lock(localtdata->tdata->lock);
962         (*(localtdata->tdata->count))++; /* keeps track of no of messages received by the coordinator */
963
964         /* Wake up the threads and invoke decideResponse (once) */
965         if(*(localtdata->tdata->count) == localtdata->tdata->buffer->f.mcount) {
966                 decideResponse(localtdata->tdata); 
967                 pthread_cond_broadcast(localtdata->tdata->threshold);
968         } else {
969                 pthread_cond_wait(localtdata->tdata->threshold, localtdata->tdata->lock);
970         }
971         pthread_mutex_unlock(localtdata->tdata->lock);
972
973         /*Based on DecideResponse(), Either COMMIT or ABORT the operation*/
974         if(*(localtdata->tdata->replyctrl) == TRANS_ABORT){
975                 if(transAbortProcess(modptr,oidlocked, localtdata->transinfo->numlocked, localtdata->tdata->buffer->f.nummod) != 0) {
976                         printf("Error in transAbortProcess() %s,%d\n", __FILE__, __LINE__);
977                         pthread_exit(NULL);
978                 }
979         }else if(*(localtdata->tdata->replyctrl) == TRANS_COMMIT){
980                 if(transComProcess(modptr, localtdata->tdata->buffer->oidmod, localtdata->tdata->buffer->oidcreated, oidlocked, localtdata->tdata->buffer->f.nummod, localtdata->tdata->buffer->f.numcreated, localtdata->transinfo->numlocked) != 0) {
981                         printf("Error in transComProcess() %s,%d\n", __FILE__, __LINE__);
982                         pthread_exit(NULL);
983                 }
984         }
985
986         /* Free memory */
987         if (localtdata->transinfo->objlocked != NULL) {
988                 free(localtdata->transinfo->objlocked);
989                 localtdata->transinfo->objlocked = NULL;
990         }
991         if (localtdata->transinfo->objnotfound != NULL) {
992                 free(localtdata->transinfo->objnotfound);
993                 localtdata->transinfo->objnotfound = NULL;
994         }
995
996         pthread_exit(NULL);
997 }
998 /* This function completes the ABORT process if the transaction is aborting 
999 */
1000 int transAbortProcess(void *modptr, unsigned int *objlocked, int numlocked, int nummod) {
1001         char *ptr;
1002         int i;
1003         objheader_t *tmp_header;
1004         void *header;
1005
1006         /* Set all ref counts as 1 and do garbage collection */
1007         ptr = modptr;
1008         for(i = 0; i< nummod; i++) {
1009                 int tmpsize;
1010                 tmp_header = (objheader_t *)ptr;
1011                 tmp_header->rcount = 0;
1012                 GETSIZE(tmpsize, tmp_header);
1013                 ptr += sizeof(objheader_t) + tmpsize;
1014         }
1015         /* Unlock objects that was locked due to this transaction */
1016         for(i = 0; i< numlocked; i++) {
1017                 if((header = mhashSearch(objlocked[i])) == NULL) {
1018                         printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1019                         return 1;
1020                 }
1021                 STATUS(((objheader_t *)header)) &= ~(LOCK);
1022         }
1023
1024         /* Send ack to Coordinator */
1025
1026         /*Free the pointer */
1027         ptr = NULL;
1028         return 0;
1029 }
1030
1031 /*This function completes the COMMIT process is the transaction is commiting
1032 */
1033 int transComProcess(void *modptr, unsigned int *oidmod, unsigned int *oidcreated, unsigned int *objlocked, int nummod, int numcreated, int numlocked) {
1034         objheader_t *header;
1035         int i = 0, offset = 0;
1036         char control;
1037
1038         /* Process each modified object saved in the mainobject store */
1039         for(i = 0; i < nummod; i++) {
1040           int tmpsize;
1041                 if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
1042                         printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1043                         return 1;
1044                 }
1045                 /* Change reference count of older address and free space in objstr ?? */
1046                 header->rcount = 0;
1047
1048                 /* Change ptr address in mhash table */
1049                 mhashRemove(oidmod[i]); //TODO: this shouldn't be necessary
1050                 mhashInsert(oidmod[i], (((char *)modptr) + offset));
1051                 GETSIZE(tmpsize, header);
1052                 offset += sizeof(objheader_t) + tmpsize;
1053
1054                 /* Update object version number */
1055                 header = (objheader_t *) mhashSearch(oidmod[i]);
1056                 header->version += 1;
1057         }
1058
1059         /*If object is in prefetch cache then update it in prefetch cache */ 
1060
1061
1062         /* If object is newly created inside transaction then commit it */
1063         for (i = 0; i < numcreated; i++)
1064         {
1065                 int tmpsize;
1066                 header = (objheader_t *)(((char *)modptr) + offset);
1067                 mhashInsert(oidcreated[i], (((char *)modptr) + offset));
1068                 GETSIZE(tmpsize, header);
1069                 offset += sizeof(objheader_t) + tmpsize;
1070                 lhashInsert(oidcreated[i], myIpAddr);
1071         }
1072
1073         /* Unlock locked objects */
1074         for(i = 0; i < numlocked; i++) {
1075                 if((header = (objheader_t *) mhashSearch(objlocked[i])) == NULL) {
1076                         printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1077                         return 1;
1078                 }
1079                 STATUS(header) &= ~(LOCK);
1080         }
1081
1082         //TODO Update location lookup table
1083
1084         /* Send ack to Coordinator */
1085         printf("TRANS_SUCCESSFUL\n");
1086         return 0;
1087 }
1088
1089 /* This function checks if the prefetch oids are same and have same offsets  
1090  * for case x.a.b and y.a.b where x and y have same oid's
1091  * or if a.b.c is a subset of x.b.c.d*/ 
1092 /* check for case where the generated request a.y.z or x.y.z.g then 
1093  * prefetch needs to be generated for x.y.z.g  if oid of a and x are same*/
1094 void checkPrefetchTuples(prefetchqelem_t *node) {
1095         int i,j, count,k, sindex, index;
1096         char *ptr, *tmp;
1097         int ntuples, slength;
1098         unsigned int *oid;
1099         short *endoffsets, *arryfields; 
1100
1101         /* Check for the case x.y.z and a.b.c are same oids */ 
1102         ptr = (char *) node;
1103         ntuples = *(GET_NTUPLES(ptr));
1104         oid = GET_PTR_OID(ptr);
1105         endoffsets = GET_PTR_EOFF(ptr, ntuples); 
1106         arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1107         /* Find offset length for each tuple */
1108         int numoffset[ntuples];
1109         numoffset[0] = endoffsets[0];
1110         for(i = 1; i<ntuples; i++) {
1111                 numoffset[i] = endoffsets[i] - endoffsets[i-1];
1112         }
1113         /* Check for redundant tuples by comparing oids of each tuple */
1114         for(i = 0; i < ntuples; i++) {
1115                 if(oid[i] == -1)
1116                         continue;
1117                 for(j = i+1 ; j < ntuples; j++) {
1118                         if(oid[j] == -1)
1119                                 continue;
1120                         /*If oids of tuples match */ 
1121                         if (oid[i] == oid[j]) {
1122                                 /* Find the smallest offset length of two tuples*/
1123                                 if(numoffset[i] >  numoffset[j]){
1124                                         slength = numoffset[j];
1125                                         sindex = j;
1126                                 }
1127                                 else {
1128                                         slength = numoffset[i];
1129                                         sindex = i;
1130                                 }
1131
1132                                 /* Compare the offset values based on the current indices
1133                                  * break if they do not match
1134                                  * if all offset values match then pick the largest tuple*/
1135
1136                                 if(i == 0) {
1137                                         k = 0;
1138                                         index = endoffsets[j -1];
1139                                         for(count = 0; count < slength; count ++) {
1140                                                 if (arryfields[k] != arryfields[index]) { 
1141                                                         break;
1142                                                 }
1143                                                 index++;
1144                                                 k++;
1145                                         }       
1146                                 } else {
1147                                         k = endoffsets[i-1];
1148                                         index = endoffsets[j-1];
1149                                         printf("Value of slength = %d\n", slength);
1150                                         for(count = 0; count < slength; count++) {
1151                                                 if(arryfields[k] != arryfields[index]) {
1152                                                         break;
1153                                                 }
1154                                                 index++;
1155                                                 k++;
1156                                         }
1157                                 }
1158
1159                                 if(slength == count) {
1160                                         oid[sindex] = -1;
1161                                 }
1162                         }
1163                 }
1164         }
1165 }
1166
1167 void checkPreCache(prefetchqelem_t *node, int *numoffset, int counter, int loopcount, unsigned int objoid, int index, int iter, int oidnfound) {
1168         char *ptr, *tmp;
1169         int ntuples, i, k, flag;
1170         unsigned int * oid;
1171         short *endoffsets, *arryfields;
1172         objheader_t *header;
1173
1174         ptr = (char *) node;
1175         ntuples = *(GET_NTUPLES(ptr));
1176         oid = GET_PTR_OID(ptr);
1177         endoffsets = GET_PTR_EOFF(ptr, ntuples);
1178         arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1179
1180         if(oidnfound == 1) {
1181                 if((header = (objheader_t *) prehashSearch(objoid)) == NULL) {
1182                         return;
1183                 } else { //Found in Prefetch Cache
1184                         //TODO Decide if object is too old, if old remove from cache
1185                         tmp = (char *) header;
1186                         /* Check if any of the offset oid is available in the Prefetch cache */
1187                         for(i = counter; i < loopcount; i++) {
1188                                 objoid = *(tmp + sizeof(objheader_t) + arryfields[counter]);
1189                                 if((header = (objheader_t *)prehashSearch(objoid)) != NULL) {
1190                                         flag = 0;
1191                                 } else {
1192                                         flag = 1;
1193                                         break;
1194                                 }
1195                         }
1196                 }
1197         } else {
1198                 for(i = counter; i<loopcount; i++) {
1199                         if((header = (objheader_t *)prehashSearch(objoid)) != NULL) {
1200                                 tmp = (char *) header;
1201                                 objoid = *(tmp + sizeof(objheader_t) + arryfields[index]);
1202                                 flag = 0;
1203                                 index++;
1204                         } else {
1205                                 flag = 1;
1206                                 break;
1207                         }
1208                 }
1209         }
1210
1211         /* If oid not found locally or in prefetch cache then 
1212          * assign the latest oid found as the new oid 
1213          * and copy left over offsets into the arrayoffsetfieldarray*/
1214         oid[iter] = objoid;
1215         numoffset[iter] = numoffset[iter] - (i+1);
1216         for(k = 0; k < numoffset[iter] ; k++) {
1217                 arryfields[endoffsets[counter]+k] = arryfields[endoffsets[counter]+k+1];
1218         }
1219
1220         if(flag == 0) {
1221                 oid[iter] = -1;
1222                 numoffset[iter] = 0;
1223         }
1224 }
1225
1226 /* This function makes machine piles to be added into the machine pile queue for each prefetch call */
1227 prefetchpile_t *makePreGroups(prefetchqelem_t *node, int *numoffset) {
1228         char *ptr, *tmp;
1229         int ntuples, slength, i, machinenum;
1230         int maxoffset;
1231         unsigned int *oid;
1232         short *endoffsets, *arryfields, *offset; 
1233         prefetchpile_t *head = NULL;
1234
1235         /* Check for the case x.y.z and a.b.c are same oids */ 
1236         ptr = (char *) node;
1237         ntuples = *(GET_NTUPLES(ptr));
1238         oid = GET_PTR_OID(ptr);
1239         endoffsets = GET_PTR_EOFF(ptr, ntuples); 
1240         arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1241
1242         /* Check for redundant tuples by comparing oids of each tuple */
1243         for(i = 0; i < ntuples; i++) {
1244                 if(oid[i] == -1)
1245                         continue;
1246                 /* For each tuple make piles */
1247                 if ((machinenum = lhashSearch(oid[i])) == 0) {
1248                         printf("Error: No such Machine %s, %d\n", __FILE__, __LINE__);
1249                         return NULL;
1250                 }
1251                 /* Insert into machine pile */
1252                 offset = &arryfields[endoffsets[i-1]];
1253                 insertPile(machinenum, oid[i], numoffset[i], offset, &head);
1254         }
1255         return head;
1256 }
1257
1258
1259 /* This function checks if the oids within the prefetch tuples are available locally.
1260  * If yes then makes the tuple invalid. If no then rearranges oid and offset values in 
1261  * the prefetchqelem_t node to represent a new prefetch tuple */
1262 prefetchpile_t *foundLocal(prefetchqelem_t *node) {
1263         int ntuples,i, j, k, oidnfound = 0, index, flag;
1264         unsigned int *oid;
1265         unsigned int  objoid;
1266         char *ptr, *tmp;
1267         objheader_t *objheader;
1268         short *endoffsets, *arryfields; 
1269         prefetchpile_t *head = NULL;
1270
1271         ptr = (char *) node;
1272         ntuples = *(GET_NTUPLES(ptr));
1273         oid = GET_PTR_OID(ptr);
1274         endoffsets = GET_PTR_EOFF(ptr, ntuples); 
1275         arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1276         /* Find offset length for each tuple */
1277         int numoffset[ntuples];//Number of offsets for each tuple
1278         numoffset[0] = endoffsets[0];
1279         for(i = 1; i<ntuples; i++) {
1280                 numoffset[i] = endoffsets[i] - endoffsets[i-1];
1281         }
1282         for(i = 0; i < ntuples; i++) { 
1283                 if(oid[i] == -1)
1284                         continue;
1285                 /* If object found locally */
1286                 if((objheader = (objheader_t*) mhashSearch(oid[i])) != NULL) { 
1287                         oidnfound = 0;
1288                         tmp = (char *) objheader;
1289                         /* Find the oid of its offset value */
1290                         if(i == 0) 
1291                                 index = 0;
1292                         else 
1293                                 index = endoffsets[i - 1];
1294                         for(j = 0 ; j < numoffset[i] ; j++) {
1295                                 objoid = *(tmp + sizeof(objheader_t) + arryfields[index]);
1296                                 /*If oid found locally then 
1297                                  *assign the latest oid found as the new oid 
1298                                  *and copy left over offsets into the arrayoffsetfieldarray*/
1299                                 oid[i] = objoid;
1300                                 numoffset[i] = numoffset[i] - (j+1);
1301                                 for(k = 0; k < numoffset[i]; k++)
1302                                         arryfields[endoffsets[j]+ k] = arryfields[endoffsets[j]+k+1];
1303                                 index++;
1304                                 /*New offset oid not found */
1305                                 if((objheader = (objheader_t*) mhashSearch(objoid)) == NULL) {
1306                                         flag = 1;
1307                                         checkPreCache(node, numoffset, j, numoffset[i], objoid, index, i, oidnfound); 
1308                                         break;
1309                                 } else 
1310                                         flag = 0;
1311                         }
1312
1313                         /*If all offset oids are found locally,make the prefetch tuple invalid */
1314                         if(flag == 0) {
1315                                 oid[i] = -1;
1316                                 numoffset[i] = 0;
1317                         }
1318                 } else {
1319                         oidnfound = 1;
1320                         /* Look in Prefetch cache */
1321                         checkPreCache(node, numoffset, 0, numoffset[i], oid[i], 0, i, oidnfound); 
1322                 }
1323
1324         }
1325         /* Make machine groups */
1326         head = makePreGroups(node, numoffset);
1327         return head;
1328 }
1329
1330 /* This function is called by the thread calling transPrefetch */
1331 void *transPrefetch(void *t) {
1332         prefetchqelem_t *qnode;
1333         prefetchpile_t *pilehead = NULL;
1334
1335         while(1) {
1336                 /* lock mutex of primary prefetch queue */
1337                 pthread_mutex_lock(&pqueue.qlock);
1338                 /* while primary queue is empty, then wait */
1339                 while((pqueue.front == NULL) && (pqueue.rear == NULL)) {
1340                         pthread_cond_wait(&pqueue.qcond, &pqueue.qlock);
1341                 }
1342
1343                 /* dequeue node to create a machine piles and  finally unlock mutex */
1344                 if((qnode = pre_dequeue()) == NULL) {
1345                         printf("Error: No node returned %s, %d\n", __FILE__, __LINE__);
1346                         pthread_mutex_unlock(&pqueue.qlock);
1347                         pthread_exit(NULL);
1348                 }
1349                 pthread_mutex_unlock(&pqueue.qlock);
1350                 /* Reduce redundant prefetch requests */
1351                 checkPrefetchTuples(qnode);
1352                 /* Check if the tuples are found locally, if yes then reduce them further*/ 
1353                 /* and group requests by remote machine ids by calling the makePreGroups() */
1354                 pilehead = foundLocal(qnode);
1355
1356                 /* Lock mutex of pool queue */
1357                 pthread_mutex_lock(&mcqueue.qlock);
1358                 /* Update the pool queue with the new remote machine piles generated per prefetch call */
1359                 mcpileenqueue(pilehead);
1360                 /* Broadcast signal on machine pile queue */
1361                 pthread_cond_broadcast(&mcqueue.qcond);
1362                 /* Unlock mutex of  machine pile queue */
1363                 pthread_mutex_unlock(&mcqueue.qlock);
1364                 /* Deallocate the prefetch queue pile node */
1365                 predealloc(qnode);
1366
1367         }
1368 }
1369
1370 /* Each thread in the  pool of threads calls this function to establish connection with
1371  * remote machines, send the prefetch requests and process the reponses from
1372  * the remote machines .
1373  * The thread is active throughout the period of runtime */
1374
1375 void *mcqProcess(void *threadid) {
1376         int tid;
1377         prefetchpile_t *mcpilenode;
1378
1379         tid = (int) threadid;
1380         while(1) {
1381                 /* Lock mutex of mc pile queue */
1382                 pthread_mutex_lock(&mcqueue.qlock);
1383                 /* When mc pile queue is empty, wait */
1384                 while((mcqueue.front == NULL) && (mcqueue.rear == NULL)) {
1385                         pthread_cond_wait(&mcqueue.qcond, &mcqueue.qlock);
1386                 }
1387                 /* Dequeue node to send remote machine connections*/
1388                 if((mcpilenode = mcpiledequeue()) == NULL) {
1389                         printf("Dequeue Error: No node returned %s %d\n", __FILE__, __LINE__);
1390                         pthread_mutex_unlock(&mcqueue.qlock);
1391                         pthread_exit(NULL);
1392                 }
1393                 /* Unlock mutex */
1394                 pthread_mutex_unlock(&mcqueue.qlock);
1395
1396                 /*Initiate connection to remote host and send request */ 
1397                 /* Process Request */
1398                 sendPrefetchReq(mcpilenode, tid);
1399
1400                 /* Deallocate the machine queue pile node */
1401                 mcdealloc(mcpilenode);
1402         }
1403 }
1404
1405 void sendPrefetchReq(prefetchpile_t *mcpilenode, int threadid) {
1406         int sd, i, offset, off, len, endpair, count = 0;
1407         struct sockaddr_in serv_addr;
1408         struct hostent *server;
1409         char machineip[16], control;
1410         objpile_t *tmp;
1411
1412
1413         /* Send Trans Prefetch Request */
1414         if ((sd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
1415                 perror("Error in socket for TRANS_REQUEST\n");
1416                 return;
1417         }
1418         bzero((char*) &serv_addr, sizeof(serv_addr));
1419         serv_addr.sin_family = AF_INET;
1420         serv_addr.sin_port = htons(LISTEN_PORT);
1421         //serv_addr.sin_addr.s_addr = inet_addr(MACHINE_IP);
1422         midtoIP(mcpilenode->mid ,machineip);
1423         machineip[15] = '\0';
1424         serv_addr.sin_addr.s_addr = inet_addr(machineip);
1425
1426         /* Open Connection */
1427         if (connect(sd, (struct sockaddr *) &serv_addr, sizeof(struct sockaddr)) < 0) {
1428                 perror("Error in connect for TRANS_REQUEST\n");
1429                 close(sd);
1430                 return;
1431         }
1432
1433         /* Send TRANS_PREFETCH control message */
1434         control = TRANS_PREFETCH;
1435         if(send(sd, &control, sizeof(char), MSG_NOSIGNAL) < sizeof(char)) {
1436                 perror("Error in sending prefetch control\n");
1437                 close(sd);
1438                 return;
1439         }
1440
1441         /* Send Oids and offsets in pairs */
1442         tmp = mcpilenode->objpiles;
1443         while(tmp != NULL) {
1444                 off = offset = 0;
1445                 count++;  /* Keeps track of the number of oid and offset tuples sent per remote machine */
1446                 len = sizeof(int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1447                 char oidnoffset[len];
1448                 memcpy(oidnoffset, &len, sizeof(int));
1449                 off = sizeof(int);
1450                 memcpy(oidnoffset + off, &tmp->oid, sizeof(unsigned int));
1451                 off += sizeof(unsigned int);
1452                 for(i = 0; i < tmp->numoffset; i++) {
1453                         memcpy(oidnoffset + off, &tmp->offset[i], sizeof(short));
1454                         off+=sizeof(short);
1455                 }
1456                 if (send(sd, &oidnoffset, sizeof(oidnoffset),MSG_NOSIGNAL) < sizeof(oidnoffset)) {
1457                         perror("Error sending fixed bytes for thread\n");
1458                         close(sd);
1459                         return;
1460                 }
1461                 tmp = tmp->next;
1462         }
1463
1464         /* Send a special char -1 to represent the end of sending oids + offset pair to remote machine */
1465         endpair = -1;
1466         if (send(sd, &endpair, sizeof(int), MSG_NOSIGNAL) < sizeof(int)) {
1467                 perror("Error sending fixed bytes for thread\n");
1468                 close(sd);
1469                 return;
1470         }
1471
1472         /* Get Response from the remote machine */
1473         getPrefetchResponse(count,sd);
1474         close(sd);
1475         return;
1476 }
1477
1478 void getPrefetchResponse(int count, int sd) {
1479         int i = 0, val, n, N, sum, index, objsize;
1480         unsigned int bufsize,oid;
1481         char buffer[RECEIVE_BUFFER_SIZE], control;
1482         char *ptr;
1483         void *modptr, *oldptr;
1484
1485         /* Read  prefetch response from the Remote machine */
1486         if((val = read(sd, &control, sizeof(char))) <= 0) {
1487                 perror("No control response for Prefetch request sent\n");
1488                 return;
1489         }
1490
1491         if(control == TRANS_PREFETCH_RESPONSE) {
1492                 /*For each oid and offset tuple sent as prefetch request to remote machine*/
1493                 while(i < count) {
1494                         sum = 0;
1495                         index = 0;
1496                         /* Read the size of buffer to be received */
1497                         if((N = read(sd, buffer, sizeof(unsigned int))) <= 0) {
1498                                 perror("Size of buffer not recv\n");
1499                                 return;
1500                         }
1501                         memcpy(&bufsize, buffer, sizeof(unsigned int));
1502                         ptr = buffer + sizeof(unsigned int);
1503                         /* Keep receiving the buffer containing oid info */ 
1504                         do {
1505                                 n = recv((int)sd, (void *)ptr+sum, bufsize-sum, 0);
1506                                 sum +=n;
1507                         } while(sum < bufsize && n != 0);
1508                         /* Decode the contents of the buffer */
1509                         index = sizeof(unsigned int);
1510                         while(index < (bufsize - sizeof(unsigned int))) {
1511                                 if(buffer[index] == OBJECT_FOUND) {
1512                                         /* Increment it to get the object */
1513                                         index += sizeof(char);
1514                                         memcpy(&oid, buffer + index, sizeof(unsigned int));
1515                                         index += sizeof(unsigned int);
1516                                         /* For each object found add to Prefetch Cache */
1517                                         memcpy(&objsize, buffer + index, sizeof(int));
1518                                         index+=sizeof(int);
1519                                         pthread_mutex_lock(&prefetchcache_mutex);
1520                                         if ((modptr = objstrAlloc(prefetchcache, objsize)) == NULL) {
1521                                                 printf("objstrAlloc error for copying into prefetch cache %s, %d\n", __FILE__, __LINE__);
1522                                                 pthread_mutex_unlock(&prefetchcache_mutex);
1523                                                 return;
1524                                         }
1525                                         pthread_mutex_unlock(&prefetchcache_mutex);
1526                                         memcpy(modptr, buffer+index, objsize);
1527                                         index += objsize;
1528                                         /* Insert the oid and its address into the prefetch hash lookup table */
1529                                         /* Do a version comparison if the oid exists */
1530                                         if((oldptr = prehashSearch(oid)) != NULL) {
1531                                                 /* If older version then update with new object ptr */
1532                                                 if(((objheader_t *)oldptr)->version < ((objheader_t *)modptr)->version) {
1533                                                         prehashRemove(oid);
1534                                                         prehashInsert(oid, modptr);
1535                                                 } else if(((objheader_t *)oldptr)->version == ((objheader_t *)modptr)->version) { 
1536                                                         /* Add the new object ptr to hash table */
1537                                                         prehashRemove(oid);
1538                                                         prehashInsert(oid, modptr);
1539                                                 } else { /* Do nothing: TODO modptr should be reference counted */
1540                                                         ;
1541                                                 }
1542                                         } else {/*If doesn't no match found in hashtable, add the object ptr to hash table*/
1543                                                 prehashInsert(oid, modptr);
1544                                         }
1545                                         /* Lock the Prefetch Cache look up table*/
1546                                         //pthread_mutex_lock(&pflookup.lock);
1547                                         /* Broadcast signal on prefetch cache condition variable */ 
1548                                         pthread_cond_broadcast(&pflookup.cond);
1549                                         /* Unlock the Prefetch Cache look up table*/
1550                                         //pthread_mutex_unlock(&pflookup.lock);
1551                                 } else if(buffer[index] == OBJECT_NOT_FOUND) {
1552                                         /* Increment it to get the object */
1553                                         /* TODO: For each object not found query DHT for new location and retrieve the object */
1554                                         index += sizeof(char);
1555                                         memcpy(&oid, buffer + index, sizeof(unsigned int));
1556                                         index += sizeof(unsigned int);
1557                                         /* Throw an error */
1558                                         printf("OBJECT NOT FOUND.... THIS SHOULD NOT HAPPEN...TERMINATE PROGRAM\n");
1559                                         exit(-1);
1560                                 } else {
1561                                         printf("Error in decoding the index value %s, %d\n",__FILE__, __LINE__);
1562                                         return;
1563                                 }
1564                         }
1565
1566                         i++;
1567                 }
1568         } else
1569                 printf("Error in receving response for prefetch request %s, %d\n",__FILE__, __LINE__);
1570         return;
1571 }
1572
1573 unsigned short getObjType(unsigned int oid)
1574 {
1575         objheader_t *objheader;
1576         unsigned short numoffsets = 0;
1577
1578         if ((objheader = (objheader_t *) mhashSearch(oid)) == NULL)
1579         {
1580                 if ((objheader = (objheader_t *) prehashSearch(oid)) == NULL)
1581                 {
1582                         prefetch(1, &oid, &numoffsets, NULL);
1583                         pthread_mutex_lock(&pflookup.lock);
1584                         while ((objheader = (objheader_t *) prehashSearch(oid)) == NULL)
1585                         {
1586                                 pthread_cond_wait(&pflookup.cond, &pflookup.lock);
1587                         }
1588                         pthread_mutex_unlock(&pflookup.lock);
1589                 }
1590         }
1591
1592         return TYPE(objheader);
1593 }
1594
1595 int startRemoteThread(unsigned int oid, unsigned int mid)
1596 {
1597         int sock;
1598         struct sockaddr_in remoteAddr;
1599         char msg[1 + sizeof(unsigned int)];
1600         int bytesSent;
1601         int status;
1602
1603         if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
1604         {
1605                 perror("startRemoteThread():socket()");
1606                 return -1;
1607         }
1608
1609         bzero(&remoteAddr, sizeof(remoteAddr));
1610         remoteAddr.sin_family = AF_INET;
1611         remoteAddr.sin_port = htons(LISTEN_PORT);
1612         remoteAddr.sin_addr.s_addr = htonl(mid);
1613         
1614         if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0)
1615         {
1616                 printf("startRemoteThread():error %d connecting to %s:%d\n", errno,
1617                         inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
1618                 status = -1;
1619         }
1620         else
1621         {
1622                 msg[0] = START_REMOTE_THREAD;
1623                 memcpy(&msg[1], &oid, sizeof(unsigned int));
1624
1625                 bytesSent = send(sock, msg, 1 + sizeof(unsigned int), 0);
1626                 if (bytesSent < 0)
1627                 {
1628                         perror("startRemoteThread():send()");
1629                         status = -1;
1630                 }
1631                 else if (bytesSent != 1 + sizeof(unsigned int))
1632                 {
1633                         printf("startRemoteThread(): error, sent %d bytes\n", bytesSent);
1634                         status = -1;
1635                 }
1636                 else
1637                 {
1638                         status = 0;
1639                 }
1640         }
1641
1642         close(sock);
1643         return status;
1644 }
1645
1646 //TODO: when reusing oids, make sure they are not already in use!
1647 unsigned int getNewOID(void) {
1648         static unsigned int id = 0xFFFFFFFF;
1649         
1650         id += 2;
1651         if (id > oidMax || id < oidMin)
1652         {
1653                 id = (oidMin | 1);
1654         }
1655         return id;
1656 }
1657
1658 int processConfigFile()
1659 {
1660         FILE *configFile;
1661         const int maxLineLength = 200;
1662         char lineBuffer[maxLineLength];
1663         char *token;
1664         const char *delimiters = " \t\n";
1665         char *commentBegin;
1666         in_addr_t tmpAddr;
1667         
1668         configFile = fopen(CONFIG_FILENAME, "r");
1669         if (configFile == NULL)
1670         {
1671                 printf("error opening %s:\n", CONFIG_FILENAME);
1672                 perror("");
1673                 return -1;
1674         }
1675
1676         numHostsInSystem = 0;
1677         sizeOfHostArray = 8;
1678         hostIpAddrs = calloc(sizeOfHostArray, sizeof(unsigned int));
1679         
1680         while(fgets(lineBuffer, maxLineLength, configFile) != NULL)
1681         {
1682                 commentBegin = strchr(lineBuffer, '#');
1683                 if (commentBegin != NULL)
1684                         *commentBegin = '\0';
1685                 token = strtok(lineBuffer, delimiters);
1686                 while (token != NULL)
1687                 {
1688                         tmpAddr = inet_addr(token);
1689                         if ((int)tmpAddr == -1)
1690                         {
1691                                 printf("error in %s: bad token:%s\n", CONFIG_FILENAME, token);
1692                                 fclose(configFile);
1693                                 return -1;
1694                         }
1695                         else
1696                                 addHost(htonl(tmpAddr));
1697                         token = strtok(NULL, delimiters);
1698                 }
1699         }
1700
1701         fclose(configFile);
1702         
1703         if (numHostsInSystem < 1)
1704         {
1705                 printf("error in %s: no IP Adresses found\n", CONFIG_FILENAME);
1706                 return -1;
1707         }
1708 #ifdef MAC
1709         myIpAddr = getMyIpAddr("en1");
1710 #else
1711         myIpAddr = getMyIpAddr("eth0");
1712 #endif
1713         myIndexInHostArray = findHost(myIpAddr);
1714         if (myIndexInHostArray == -1)
1715         {
1716                 printf("error in %s: IP Address of eth0 not found\n", CONFIG_FILENAME);
1717                 return -1;
1718         }
1719         oidsPerBlock = (0xFFFFFFFF / numHostsInSystem) + 1;
1720         oidMin = oidsPerBlock * myIndexInHostArray;
1721         if (myIndexInHostArray == numHostsInSystem - 1)
1722                 oidMax = 0xFFFFFFFF;
1723         else
1724                 oidMax = oidsPerBlock * (myIndexInHostArray + 1) - 1;
1725
1726         return 0;
1727 }
1728
1729 void addHost(unsigned int hostIp)
1730 {
1731         unsigned int *tmpArray;
1732
1733         if (findHost(hostIp) != -1)
1734                 return;
1735
1736         if (numHostsInSystem == sizeOfHostArray)
1737         {
1738                 tmpArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
1739                 memcpy(tmpArray, hostIpAddrs, sizeof(unsigned int) * numHostsInSystem);
1740                 free(hostIpAddrs);
1741                 hostIpAddrs = tmpArray;
1742         }
1743
1744         hostIpAddrs[numHostsInSystem++] = hostIp;
1745
1746         return;
1747 }
1748
1749 int findHost(unsigned int hostIp)
1750 {
1751         int i;
1752         for (i = 0; i < numHostsInSystem; i++)
1753                 if (hostIpAddrs[i] == hostIp)
1754                         return i;
1755
1756         //not found
1757         return -1;
1758 }
1759