--- /dev/null
+1. fix backup designations
+2. handle the case of only 1 machine left in the system
+
--- /dev/null
+#include "clookup.h"
+#include "abortreaders.h"
+
+chashtable_t * aborttable;
+pthread_mutex_t aborttablelock;
+struct readerlist *freelist;
+
+void initreaderlist() {
+ pthread_mutex_init(&aborttablelock, NULL);
+ aborttable=chashCreate(CHASH_SIZE, CLOADFACTOR);
+ freelist=NULL;
+}
+
+void addtransaction(unsigned int oid) {
+ struct readerlist * rl;
+ int i;
+ if (pthread_mutex_trylock(&aborttablelock)!=0)
+ return;
+ rl=(struct readerlist *)chashSearch(aborttable, oid);
+ if (rl==NULL) {
+ if (freelist==NULL)
+ rl=calloc(1,sizeof(struct readerlist ));
+ else {
+ rl=freelist;
+ freelist=rl->next;
+ memset(rl,0, sizeof(struct readerlist));
+ }
+ chashInsert(aborttable, oid, rl);
+ }
+ while(rl->numreaders==READERSIZE) {
+ if (rl->next!=NULL)
+ rl=rl->next;
+ else {
+ rl->next=calloc(1,sizeof(struct readerlist));
+ rl=rl->next;
+ }
+ }
+ rl->numreaders++;
+ for(i=0;i<READERSIZE;i++) {
+ if (rl->array[i]==NULL) {
+ rl->array[i]=&t_abort;
+ pthread_mutex_unlock(&aborttablelock);
+ return;
+ }
+ }
+ pthread_mutex_unlock(&aborttablelock);
+ printf("ERROR in addtransaction\n");
+}
+
+void removetransaction(unsigned int oidarray[], unsigned int numoids) {
+ int i,j;
+ pthread_mutex_lock(&aborttablelock);
+ for(i=0;i<numoids;i++) {
+ unsigned int oid=oidarray[i];
+ struct readerlist *rl=chashRemove2(aborttable, oid);
+ struct readerlist *tmp;
+ if (rl==NULL)
+ continue;
+ do {
+ int count=rl->numreaders;
+ int j;
+ for(j=0;count;j++) {
+ int *t_abort=rl->array[j];
+ if (t_abort!=NULL) {
+ *t_abort=1;//It's okay to set our own abort flag...it is
+ //too late to abort us
+ count--;
+ }
+ }
+ tmp=rl;
+ rl=rl->next;
+ tmp->next=freelist;
+ freelist=tmp;
+ } while(rl!=NULL);
+ }
+ pthread_mutex_unlock(&aborttablelock);
+}
+
+void removethisreadtransaction(unsigned char* oidverread, unsigned int numoids) {
+ int i,j;
+ pthread_mutex_lock(&aborttablelock);
+ for(i=0;i<numoids;i++) {
+ unsigned int oid=*((unsigned int *)oidverread);
+ struct readerlist * rl=chashSearch(aborttable, oid);
+ struct readerlist *first=rl;
+ oidverread+=(sizeof(unsigned int)+sizeof(unsigned short));
+ while(rl!=NULL) {
+ for(j=0;j<READERSIZE;j++) {
+ if (rl->array[j]==&t_abort) {
+ rl->array[j]=NULL;
+ if ((--rl->numreaders)==0) {
+ if (first==rl) {
+ chashRemove2(aborttable, oid);
+ if (rl->next!=NULL)
+ chashInsert(aborttable, oid, rl->next);
+ rl->next=freelist;
+ freelist=rl;
+ } else {
+ first->next=rl->next;
+ rl->next=freelist;
+ freelist=rl;
+ }
+ }
+ goto nextitem;
+ }
+ }
+ first=rl;
+ rl=rl->next;
+ }
+ nextitem:
+ ;
+ }
+ pthread_mutex_unlock(&aborttablelock);
+}
+
+void removetransactionhash() {
+ chashlistnode_t *ptr=c_table;
+ int i,j;
+ pthread_mutex_lock(&aborttablelock);
+ for(i=0;i<c_size;i++) {
+ chashlistnode_t *curr=&ptr[i];
+ do {
+ unsigned int oid=curr->key;
+ if (oid==0)
+ break;
+ struct readerlist * rl=chashSearch(aborttable, oid);
+ struct readerlist *first=rl;
+ while(rl!=NULL) {
+ for(j=0;j<READERSIZE;j++) {
+ if (rl->array[j]==&t_abort) {
+ rl->array[j]=NULL;
+ if ((--rl->numreaders)==0) {
+ if (first==rl) {
+ chashRemove2(aborttable, oid);
+ if (rl->next!=NULL)
+ chashInsert(aborttable, oid, rl->next);
+ rl->next=freelist;
+ freelist=rl;
+ } else {
+ first->next=rl->next;
+ rl->next=freelist;
+ freelist=rl;
+ }
+ }
+ goto nextitem;
+ }
+ }
+ first=rl;
+ rl=rl->next;
+ }
+ nextitem:
+ curr=curr->next;
+ } while(curr!=NULL);
+ }
+ pthread_mutex_unlock(&aborttablelock);
+}
+
+
+void removethistransaction(unsigned int oidarray[], unsigned int numoids) {
+ int i,j;
+ pthread_mutex_lock(&aborttablelock);
+ for(i=0;i<numoids;i++) {
+ unsigned int oid=oidarray[i];
+ struct readerlist * rl=chashSearch(aborttable, oid);
+
+ struct readerlist *first=rl;
+ while(rl!=NULL) {
+ for(j=0;j<READERSIZE;j++) {
+ if (rl->array[j]==&t_abort) {
+ rl->array[j]=NULL;
+ if ((--rl->numreaders)==0) {
+ if (first==rl) {
+ chashRemove2(aborttable, oid);
+ if (rl->next!=NULL)
+ chashInsert(aborttable, oid, rl->next);
+ rl->next=freelist;
+ freelist=rl;
+ } else {
+ first->next=rl->next;
+ rl->next=freelist;
+ freelist=rl;
+ }
+ }
+ goto nextitem;
+ }
+ }
+ first=rl;
+ rl=rl->next;
+ }
+ nextitem:
+ ;
+ }
+ pthread_mutex_unlock(&aborttablelock);
+}
+
--- /dev/null
+#ifndef ABORTREADERS_H
+#define ABORTREADERS_H
+#include "dstm.h"
+
+#define READERSIZE 8
+
+struct readerlist {
+ int *array[READERSIZE];
+ int numreaders;
+ struct readerlist * next;
+};
+
+void initreaderlist();
+void addtransaction(unsigned int oid);
+void removetransaction(unsigned int oidarray[], unsigned int numoids);
+void removethistransaction(unsigned int oidarray[], unsigned int numoids);
+void removethisreadtransaction(unsigned char* oidverread, unsigned int numoids);
+void removetransactionhash();
+#endif
--- /dev/null
+#include "addPrefetchEnhance.h"
+#include "prelookup.h"
+
+extern int numprefetchsites; // Number of prefetch sites
+extern pfcstats_t *evalPrefetch; //Global array that keeps track of operation mode (ON/OFF) for each prefetch site
+extern objstr_t *prefetchcache; //Global Prefetch cache
+extern pthread_mutex_t prefetchcache_mutex; //Mutex to lock Prefetch Cache
+extern unsigned int myIpAddr;
+
+/* This function creates and initializes the
+ * evalPrefetch global array */
+pfcstats_t *initPrefetchStats() {
+ pfcstats_t *ptr;
+ if((ptr = calloc(numprefetchsites, sizeof(pfcstats_t))) == NULL) {
+ printf("%s() Calloc error in %s at line %d\n", __func__, __FILE__, __LINE__);
+ return NULL;
+ }
+ int i;
+ /* Enable prefetching at the beginning */
+ for(i=0; i<numprefetchsites; i++) {
+ ptr[i].operMode = 1;
+ ptr[i].callcount = 0;
+ ptr[i].retrycount = RETRYINTERVAL; //N
+ ptr[i].uselesscount = SHUTDOWNINTERVAL; //M
+ }
+ return ptr;
+}
+
+int getRetryCount(int siteid) {
+ return evalPrefetch[siteid].retrycount;
+}
+
+int getUselessCount(int siteid) {
+ return evalPrefetch[siteid].uselesscount;
+}
+
+char getOperationMode(int siteid) {
+ return evalPrefetch[siteid].operMode;
+}
+
+/* This function updates counters and mode of operation of a
+ * prefetch site during runtime. When the prefetch call at a site
+ * generates oids that are found/not found in the prefetch cache,
+ * we take action accordingly */
+void handleDynPrefetching(int numLocal, int ntuples, int siteid) {
+ if(numLocal < ntuples) {
+ /* prefetch not found locally(miss in cache) */
+ evalPrefetch[siteid].operMode = 1;
+ evalPrefetch[siteid].uselesscount = SHUTDOWNINTERVAL;
+ } else {
+ if(getOperationMode(siteid) != 0) {
+ evalPrefetch[siteid].uselesscount--;
+ if(evalPrefetch[siteid].uselesscount <= 0) {
+ evalPrefetch[siteid].operMode = 0;
+ }
+ }
+ }
+}
+
+#if 1
+/* This function clears from prefetch cache those
+ * entries that caused a transaction abort */
+void cleanPCache() {
+ unsigned int size = c_size;
+ chashlistnode_t *ptr = c_table;
+ int i;
+ for(i = 0; i < size; i++) {
+ chashlistnode_t *curr = &ptr[i]; //for each entry in the cache lookupTable
+ while(curr != NULL) {
+ if(curr->key == 0)
+ break;
+ objheader_t *header1, *header2;
+ /* Not found in local machine's object store and found in prefetch cache */
+ if((header1 = mhashSearch(curr->key)) == NULL && ((header2 = prehashSearch(curr->key)) != NULL)) {
+ /* Remove from prefetch cache */
+ prehashRemove(curr->key);
+ }
+ curr = curr->next;
+ }
+ }
+}
+#else
+/* This function clears from prefetch cache those
+ * entries that caused a transaction abort */
+void cleanPCache() {
+ unsigned int size = c_size;
+ struct chashentry *ptr = c_table;
+ int i;
+ for(i = 0; i < size; i++) {
+ struct chashentry *curr = &ptr[i]; //for each entry in the cache lookupTable
+ if(curr->key == 0)
+ continue;
+ objheader_t *header1, *header2;
+ /* Not found in local machine's object store and found in prefetch cache */
+ if((header1 = mhashSearch(curr->key)) == NULL && ((header2 = prehashSearch(curr->key)) != NULL)) {
+ /* Remove from prefetch cache */
+ prehashRemove(curr->key);
+ }
+ }
+}
+#endif
+
+/* This function updates the prefetch cache with
+ * entries from the transaction cache when a
+ * transaction commits
+ * Return -1 on error else returns 0 */
+int updatePrefetchCache(trans_req_data_t *tdata) {
+ int retval;
+ char oidType;
+ oidType = 'R';
+ if(tdata->f.numread > 0) {
+ if((retval = copyToCache(tdata->f.numread, (unsigned int *)(tdata->objread), oidType)) != 0) {
+ printf("%s(): Error in copying objects read at %s, %d\n", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+ }
+ if(tdata->f.nummod > 0) {
+ oidType = 'M';
+ if((retval = copyToCache(tdata->f.nummod, tdata->oidmod, oidType)) != 0) {
+ printf("%s(): Error in copying objects read at %s, %d\n", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int copyToCache(int numoid, unsigned int *oidarray, char oidType) {
+ int i;
+ for (i = 0; i < numoid; i++) {
+ unsigned int oid;
+ if(oidType == 'R') {
+ char * objread = (char *) oidarray;
+ oid = *((unsigned int *)(objread+(sizeof(unsigned int)+
+ sizeof(unsigned short))*i));
+ } else {
+ oid = oidarray[i];
+ }
+ pthread_mutex_lock(&prefetchcache_mutex);
+ objheader_t * header;
+ if((header = (objheader_t *) t_chashSearch(oid)) == NULL) {
+ printf("%s() obj %x is no longer in transaction cache at %s , %d\n", __func__, oid,__FILE__, __LINE__);
+ fflush(stdout);
+ return -1;
+ }
+ //copy into prefetch cache
+ int size;
+ GETSIZE(size, header);
+ objheader_t * newAddr;
+ if((newAddr = prefetchobjstrAlloc(size + sizeof(objheader_t))) == NULL) {
+ printf("%s(): Error in getting memory from prefetch cache at %s, %d\n", __func__,
+ __FILE__, __LINE__);
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ return -1;
+ }
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ memcpy(newAddr, header, size+sizeof(objheader_t));
+ //Increment version for every modified object
+ if(oidType == 'M') {
+ newAddr->version += 1;
+ newAddr->notifylist = NULL;
+ }
+ //make an entry in prefetch lookup hashtable
+ void *oldptr;
+ if((oldptr = prehashSearch(oid)) != NULL) {
+ prehashRemove(oid);
+ prehashInsert(oid, newAddr);
+ } else {
+ prehashInsert(oid, newAddr);
+ }
+ } //end of for
+ return 0;
+}
--- /dev/null
+#ifndef _ADDPREFETCHENHANCE_H_
+#define _ADDPREFETCHENHANCE_H_
+
+#include "dstm.h"
+#include "mlookup.h"
+#include "gCollect.h"
+
+typedef struct prefetchCountStats {
+ int retrycount; /* keeps track of when to retry and check if we can turn on this prefetch site */
+ int uselesscount; /* keeps track of how long was the prefetching at site useles */
+ char operMode; /* 1 = on , 0 = off */
+ int callcount;
+} pfcstats_t;
+
+pfcstats_t *initPrefetchStats();
+int getRetryCount(int siteid);
+int getUselessCount(int siteid);
+char getOperationMode(int);
+void handleDynPrefetching(int, int, int);
+void cleanPCache();
+int updatePrefetchCache(trans_req_data_t *);
+int copyToCache(int, unsigned int *, char);
+
+#endif
--- /dev/null
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <netinet/tcp.h>
+#include "addUdpEnhance.h"
+#include "prelookup.h"
+#ifdef ABORTREADERS
+#include "abortreaders.h"
+#endif
+
+/************************
+ * Global Variables *
+ ***********************/
+int udpSockFd;
+extern unsigned int myIpAddr;
+
+int createUdpSocket() {
+ int sockfd;
+ struct sockaddr_in clientaddr;
+ const int on = 1;
+
+ if((sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) {
+ perror("socket creation failed");
+ return -1;
+ }
+ if((setsockopt(sockfd, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on))) < 0) {
+ perror("setsockopt - SOL_SOCKET");
+ return -1;
+ }
+ return sockfd;
+}
+
+int udpInit() {
+ int sockfd;
+ int setsockflag = 1;
+ struct sockaddr_in servaddr;
+
+ //Create Global Udp Socket
+ if((udpSockFd = createUdpSocket()) < 0) {
+ printf("Error in socket\n");
+ }
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if(sockfd < 0) {
+ perror("socket");
+ exit(1);
+ }
+
+ if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &setsockflag, sizeof(setsockflag)) < 0) {
+ perror("socket");
+ exit(1);
+ }
+
+#ifdef MAC
+ if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &setsockflag, sizeof(setsockflag)) < 0) {
+ perror("socket");
+ exit(1);
+ }
+#endif
+
+ bzero(&servaddr, sizeof(servaddr));
+ servaddr.sin_family = AF_INET;
+ servaddr.sin_port = htons(UDP_PORT);
+ servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ if(bind(sockfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) {
+ perror("bind");
+ exit(1);
+ }
+
+ return sockfd;
+}
+
+/* Function that listens for udp broadcast messages */
+void *udpListenBroadcast(void *sockfd) {
+ pthread_t thread_udpBroadcast;
+ struct sockaddr_in servaddr;
+ socklen_t socklen = sizeof(struct sockaddr);
+ char readBuffer[MAX_SIZE];
+ int retval;
+
+ printf("Listening on port %d, fd = %d\n", UDP_PORT, (int)sockfd);
+
+ memset(readBuffer, 0, MAX_SIZE);
+ while(1) {
+ int bytesRcvd = recvfrom((int)sockfd, readBuffer, sizeof(readBuffer), 0, (struct sockaddr *)&servaddr, &socklen);
+ if(bytesRcvd == -1) {
+ printf("DEBUG-> Recv Error! \n");
+ break;
+ }
+ short status = *((short *) &readBuffer[0]);
+ switch (status) {
+ case INVALIDATE_OBJS:
+ if((retval = invalidateFromPrefetchCache(readBuffer))!= 0) {
+ printf("Error: In invalidateFromPrefetchCache() at %s, %d\n", __FILE__, __LINE__);
+ break;
+ }
+ break;
+
+ default:
+ printf("Error: Cannot regcognize the status in file %s, at line %d\n", __FILE__, __LINE__);
+ }
+ }
+
+ /* Close connection */
+ if(close((int)sockfd) == -1)
+ perror("close");
+ pthread_exit(NULL);
+}
+
+/* Function that invalidate objects that
+ * have been currently modified
+ * returns -1 on error and 0 on success */
+int invalidateObj(trans_req_data_t *tdata) {
+ struct sockaddr_in clientaddr;
+ int retval;
+
+ bzero(&clientaddr, sizeof(clientaddr));
+ clientaddr.sin_family = AF_INET;
+ clientaddr.sin_port = htons(UDP_PORT);
+ clientaddr.sin_addr.s_addr = INADDR_BROADCAST;
+ int maxObjsPerMsg = (MAX_SIZE - 2*sizeof(unsigned int))/sizeof(unsigned int);
+ if(tdata->f.nummod < maxObjsPerMsg) {
+ /* send single udp msg */
+ int iteration = 0;
+ if((retval = sendUdpMsg(tdata, &clientaddr, iteration)) < 0) {
+ printf("%s() error in sending udp message at %s, %d\n", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+ } else {
+ /* Split into several udp msgs */
+ int maxUdpMsg = tdata->f.nummod/maxObjsPerMsg;
+ if (tdata->f.nummod%maxObjsPerMsg) maxUdpMsg++;
+ int i;
+ for(i = 1; i <= maxUdpMsg; i++) {
+ if((retval = sendUdpMsg(tdata, &clientaddr, i)) < 0) {
+ printf("%s() error in sending udp message at %s, %d\n", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Function sends a udp broadcast, also distinguishes
+ * msg size to be sent based on the iteration flag
+ * returns -1 on error and 0 on success */
+int sendUdpMsg(trans_req_data_t *tdata, struct sockaddr_in *clientaddr, int iteration) {
+ char writeBuffer[MAX_SIZE];
+ int maxObjsPerMsg = (MAX_SIZE - 2*sizeof(unsigned int))/sizeof(unsigned int);
+ int offset = 0;
+ *((short *)&writeBuffer[0]) = INVALIDATE_OBJS; //control msg
+ offset += sizeof(short);
+ *((unsigned int *)(writeBuffer+offset)) = myIpAddr; //mid sending invalidation
+ offset += sizeof(unsigned int);
+ if(iteration == 0) { // iteration flag == zero, send single udp msg
+ *((short *)(writeBuffer+offset)) = (short) (sizeof(unsigned int) * (tdata->f.nummod)); //sizeof msg
+ offset += sizeof(short);
+ int i;
+ for(i = 0; i < tdata->f.nummod; i++) {
+ *((unsigned int *) (writeBuffer+offset)) = tdata->oidmod[i]; //copy objects
+ offset += sizeof(unsigned int);
+ }
+ } else { // iteration flag > zero, send multiple udp msg
+ int numObj;
+ if((tdata->f.nummod - (iteration * maxObjsPerMsg)) > 0)
+ numObj = maxObjsPerMsg;
+ else
+ numObj = tdata->f.nummod - ((iteration - 1)*maxObjsPerMsg);
+ *((short *)(writeBuffer+offset)) = (short) (sizeof(unsigned int) * numObj);
+ offset += sizeof(short);
+ int index = (iteration - 1) * maxObjsPerMsg;
+ int i;
+ for(i = 0; i < numObj; i++) {
+ *((unsigned int *) (writeBuffer+offset)) = tdata->oidmod[index+i];
+ offset += sizeof(unsigned int);
+ }
+ }
+ int n;
+ if((n = sendto(udpSockFd, (const void *) writeBuffer, sizeof(writeBuffer), 0, (const struct sockaddr *)clientaddr, sizeof(struct sockaddr_in))) < 0) {
+ perror("sendto error- ");
+ printf("DEBUG-> sendto error: errorno %d\n", errno);
+ return -1;
+ }
+ return 0;
+}
+
+/* Function searches given oid in prefetch cache and invalidates obj from cache
+ * returns -1 on error and 0 on success */
+int invalidateFromPrefetchCache(char *buffer) {
+ int offset = sizeof(short);
+ /* Read mid from msg */
+ unsigned int mid = *((unsigned int *)(buffer+offset));
+ offset += sizeof(unsigned int);
+ //Invalidate only if broadcast if from different machine
+ if(mid != myIpAddr) {
+ /* Read objects sent */
+ int numObjsRecv = *((short *)(buffer+offset)) / sizeof(unsigned int);
+ offset += sizeof(short);
+ int i;
+#ifdef ABORTREADERS
+ removetransaction((unsigned int *)(buffer+offset), numObjsRecv);
+#endif
+ for(i = 0; i < numObjsRecv; i++) {
+ unsigned int oid;
+ oid = *((unsigned int *)(buffer+offset));
+ objheader_t *header;
+ /* Lookup Objects in prefetch cache and remove them */
+ if(((header = prehashSearch(oid)) != NULL)) {
+ prehashRemove(oid);
+ }
+ offset += sizeof(unsigned int);
+ }
+ }
+ return 0;
+}
--- /dev/null
+#ifndef _ADDUDPENHANCE_H
+#define _ADDUDPENHANCE_H
+
+#include "dstm.h"
+#include "mlookup.h"
+
+
+/*******************************
+ * Udp Message structures
+ ******************************/
+#define INVALIDATE_OBJS 101
+
+/*************************
+ * Global constants
+ ************************/
+#define MAX_SIZE 2000
+
+/********************************
+ * Function Prototypes
+ *******************************/
+int createUdpSocket();
+int udpInit();
+void *udpListenBroadcast(void *);
+int invalidateObj(trans_req_data_t *);
+int invalidateFromPrefetchCache(char *);
+int sendUdpMsg(trans_req_data_t *, struct sockaddr_in *, int);
+#endif
--- /dev/null
+#include "clookup.h"
+
+__thread chashlistnode_t *c_table;
+__thread unsigned int c_size;
+__thread unsigned int c_mask;
+__thread unsigned int c_numelements;
+__thread unsigned int c_threshold;
+__thread double c_loadfactor;
+
+void t_chashCreate(unsigned int size, double loadfactor) {
+ chashtable_t *ctable;
+ chashlistnode_t *nodes;
+ int i;
+
+ // Allocate space for the hash table
+
+
+ c_table = calloc(size, sizeof(chashlistnode_t));
+ c_loadfactor = loadfactor;
+ c_size = size;
+ c_threshold=size*loadfactor;
+ c_mask = (size << 1)-1;
+ c_numelements = 0; // Initial number of elements in the hash
+}
+
+chashtable_t *chashCreate(unsigned int size, double loadfactor) {
+ chashtable_t *ctable;
+ chashlistnode_t *nodes;
+ int i;
+
+ if((ctable = calloc(1, sizeof(chashtable_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+
+ // Allocate space for the hash table
+ if((nodes = calloc(size, sizeof(chashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ free(ctable);
+ return NULL;
+ }
+
+ ctable->table = nodes;
+ ctable->loadfactor = loadfactor;
+ ctable->size = size;
+ ctable->threshold=size*loadfactor;
+ ctable->mask = (size << 1)-1;
+ ctable->numelements = 0; // Initial number of elements in the hash
+
+
+ return ctable;
+}
+
+//Finds the right bin in the hash table
+static INLINE unsigned int chashFunction(chashtable_t *table, unsigned int key) {
+ return ( key & (table->mask))>>1; //throw away low order bit
+}
+
+//Store objects and their pointers into hash
+void chashInsert(chashtable_t *table, unsigned int key, void *val) {
+ chashlistnode_t *ptr;
+
+
+ if(table->numelements > (table->threshold)) {
+ //Resize
+ unsigned int newsize = table->size << 1;
+ chashResize(table,newsize);
+ }
+
+ ptr = &table->table[(key&table->mask)>>1];
+ table->numelements++;
+
+ if(ptr->key==0) {
+ ptr->key=key;
+ ptr->val=val;
+ } else { // Insert in the beginning of linked list
+ chashlistnode_t * node = calloc(1, sizeof(chashlistnode_t));
+ node->key = key;
+ node->val = val;
+ node->next = ptr->next;
+ ptr->next=node;
+ }
+}
+
+// Search for an address for a given oid
+INLINE void * chashSearch(chashtable_t *table, unsigned int key) {
+ //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
+ chashlistnode_t *node = &table->table[(key & table->mask)>>1];
+
+ do {
+ if(node->key == key) {
+ return node->val;
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ return NULL;
+}
+
+//Store objects and their pointers into hash
+void t_chashInsert(unsigned int key, void *val) {
+ chashlistnode_t *ptr;
+
+
+ if(c_numelements > (c_threshold)) {
+ //Resize
+ unsigned int newsize = c_size << 1;
+ t_chashResize(newsize);
+ }
+
+ ptr = &c_table[(key&c_mask)>>1];
+ c_numelements++;
+
+ if(ptr->key==0) {
+ ptr->key=key;
+ ptr->val=val;
+ } else { // Insert in the beginning of linked list
+ chashlistnode_t * node = calloc(1, sizeof(chashlistnode_t));
+ node->key = key;
+ node->val = val;
+ node->next = ptr->next;
+ ptr->next=node;
+ }
+}
+
+// Search for an address for a given oid
+INLINE void * t_chashSearch(unsigned int key) {
+ //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
+ chashlistnode_t *node = &c_table[(key & c_mask)>>1];
+
+ do {
+ if(node->key == key) {
+ return node->val;
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ return NULL;
+}
+
+unsigned int chashRemove(chashtable_t *table, unsigned int key) {
+ return chashRemove2(table, key)==NULL;
+
+}
+
+void * chashRemove2(chashtable_t *table, unsigned int key) {
+ int index;
+ chashlistnode_t *curr, *prev;
+ chashlistnode_t *ptr, *node;
+ void *value;
+
+ ptr = table->table;
+ index = chashFunction(table,key);
+ curr = &ptr[index];
+
+ for (; curr != NULL; curr = curr->next) {
+ if (curr->key == key) { // Find a match in the hash table
+ table->numelements--; // Decrement the number of elements in the global hashtable
+ if ((curr == &ptr[index]) && (curr->next == NULL)) { // Delete the first item inside the hashtable with no linked list of chashlistnode_t
+ curr->key = 0;
+ value=curr->val;
+ curr->val = NULL;
+ } else if ((curr == &ptr[index]) && (curr->next != NULL)) { //Delete the first item with a linked list of chashlistnode_t connected
+ curr->key = curr->next->key;
+ value=curr->val;
+ curr->val = curr->next->val;
+ node = curr->next;
+ curr->next = curr->next->next;
+ free(node);
+ } else { // Regular delete from linked listed
+ prev->next = curr->next;
+ value=curr->val;
+ free(curr);
+ }
+ return value;
+ }
+ prev = curr;
+ }
+ return NULL;
+}
+
+unsigned int chashResize(chashtable_t *table, unsigned int newsize) {
+ chashlistnode_t *node, *ptr, *curr; // curr and next keep track of the current and the next chashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the chashlistnode_t for each bin in hashtable
+ unsigned int i,index;
+ unsigned int mask;
+
+ ptr = table->table;
+ oldsize = table->size;
+
+ if((node = calloc(newsize, sizeof(chashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ table->table = node; //Update the global hashtable upon resize()
+ table->size = newsize;
+ table->threshold = newsize * table->loadfactor;
+ mask=table->mask = (newsize << 1)-1;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ do { //Inner loop to go through linked lists
+ unsigned int key;
+ chashlistnode_t *tmp,*next;
+
+ if ((key=curr->key) == 0) { //Exit inner loop if there the first element is 0
+ break; //key = val =0 for element if not present within the hash table
+ }
+ next = curr->next;
+ index = (key & mask) >>1;
+ tmp=&node[index];
+ // Insert into the new table
+ if(tmp->key == 0) {
+ tmp->key = curr->key;
+ tmp->val = curr->val;
+ if (!isfirst) {
+ free(curr);
+ }
+ }/*
+ NOTE: Add this case if you change this...
+ This case currently never happens because of the way things rehash....
+ else if (isfirst) {
+ chashlistnode_t *newnode= calloc(1, sizeof(chashlistnode_t));
+ newnode->key = curr->key;
+ newnode->val = curr->val;
+ newnode->next = tmp->next;
+ tmp->next=newnode;
+ } */
+ else {
+ curr->next=tmp->next;
+ tmp->next=curr;
+ }
+
+ isfirst = 0;
+ curr = next;
+ } while(curr!=NULL);
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+unsigned int t_chashResize(unsigned int newsize) {
+ chashlistnode_t *node, *ptr, *curr; // curr and next keep track of the current and the next chashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the chashlistnode_t for each bin in hashtable
+ unsigned int i,index;
+ unsigned int mask;
+
+ ptr = c_table;
+ oldsize = c_size;
+
+ if((node = calloc(newsize, sizeof(chashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ c_table = node; //Update the global hashtable upon resize()
+ c_size = newsize;
+ c_threshold = newsize * c_loadfactor;
+ mask=c_mask = (newsize << 1)-1;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ do { //Inner loop to go through linked lists
+ unsigned int key;
+ chashlistnode_t *tmp,*next;
+
+ if ((key=curr->key) == 0) { //Exit inner loop if there the first element is 0
+ break; //key = val =0 for element if not present within the hash table
+ }
+ next = curr->next;
+ index = (key & mask) >>1;
+ tmp=&node[index];
+ // Insert into the new table
+ if(tmp->key == 0) {
+ tmp->key = curr->key;
+ tmp->val = curr->val;
+ if (!isfirst) {
+ free(curr);
+ }
+ }/*
+ NOTE: Add this case if you change this...
+ This case currently never happens because of the way things rehash....
+ else if (isfirst) {
+ chashlistnode_t *newnode= calloc(1, sizeof(chashlistnode_t));
+ newnode->key = curr->key;
+ newnode->val = curr->val;
+ newnode->next = tmp->next;
+ tmp->next=newnode;
+ } */
+ else {
+ curr->next=tmp->next;
+ tmp->next=curr;
+ }
+
+ isfirst = 0;
+ curr = next;
+ } while(curr!=NULL);
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+//Delete the entire hash table
+void chashDelete(chashtable_t *ctable) {
+ int i;
+ chashlistnode_t *ptr = ctable->table;
+
+ for(i=0 ; i<ctable->size ; i++) {
+ chashlistnode_t * curr = ptr[i].next;
+ while(curr!=NULL) {
+ chashlistnode_t * next = curr->next;
+ free(curr);
+ curr=next;
+ }
+ }
+ free(ptr);
+ free(ctable);
+}
+
+//Delete the entire hash table
+void t_chashDelete() {
+ int i;
+ chashlistnode_t *ptr = c_table;
+
+ for(i=0 ; i<c_size ; i++) {
+ chashlistnode_t * curr = ptr[i].next;
+ while(curr!=NULL) {
+ chashlistnode_t * next = curr->next;
+ free(curr);
+ curr=next;
+ }
+ }
+ free(ptr);
+ c_table=NULL;
+}
--- /dev/null
+#ifndef _CLOOKUP_H_
+#define _CLOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define CLOADFACTOR 0.25
+#define CHASH_SIZE 1024
+
+#define INLINE inline __attribute__((always_inline))
+
+
+typedef struct chashlistnode {
+ unsigned int key;
+ void *val; //this can be cast to another type or used to point to a larger structure
+ struct chashlistnode *next;
+} chashlistnode_t;
+
+typedef struct chashtable {
+ chashlistnode_t *table; // points to beginning of hash table
+ unsigned int size;
+ unsigned int mask;
+ unsigned int numelements;
+ unsigned int threshold;
+ double loadfactor;
+} chashtable_t;
+
+
+void t_chashCreate(unsigned int size, double loadfactor);
+void t_chashInsert(unsigned int key, void *val);
+void * t_chashSearch(unsigned int key);
+unsigned int t_chashResize(unsigned int newsize);
+void t_chashDelete();
+
+/* Prototypes for hash*/
+chashtable_t *chashCreate(unsigned int size, double loadfactor);
+static unsigned int chashFunction(chashtable_t *table, unsigned int key);
+void chashInsert(chashtable_t *table, unsigned int key, void *val);
+void *chashSearch(chashtable_t *table, unsigned int key); //returns val, NULL if not found
+unsigned int chashRemove(chashtable_t *table, unsigned int key); //returns -1 if not found
+void * chashRemove2(chashtable_t *table, unsigned int key); //returns -1 if not found
+unsigned int chashResize(chashtable_t *table, unsigned int newsize);
+void chashDelete(chashtable_t *table);
+/* end hash */
+
+extern __thread chashlistnode_t *c_table;
+extern __thread unsigned int c_size;
+extern __thread unsigned int c_mask;
+extern __thread unsigned int c_numelements;
+extern __thread unsigned int c_threshold;
+extern __thread double c_loadfactor;
+
+#endif
--- /dev/null
+#include "clookup.h"
+#define INLINE inline __attribute__((always_inline))
+
+chashtable_t *chashCreate(unsigned int size, float loadfactor) {
+ chashtable_t *ctable;
+ struct chashentry *nodes;
+ int i;
+
+ if((ctable = calloc(1, sizeof(chashtable_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+
+ // Allocate space for the hash table
+ if((nodes = calloc(size, sizeof(struct chashentry))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ free(ctable);
+ return NULL;
+ }
+
+ ctable->table = nodes;
+ ctable->size = size;
+ ctable->mask = (size << 1)-1;
+ ctable->numelements = 0; // Initial number of elements in the hash
+ ctable->loadfactor = loadfactor;
+ ctable->capacity=ctable->loadfactor*ctable->size;
+ return ctable;
+}
+
+//Finds the right bin in the hash table
+static INLINE unsigned int chashFunction(chashtable_t *table, unsigned int key, unsigned int i) {
+ return ((key+i*331) & table->mask)>>1; //throw away low order bit
+}
+
+//Store objects and their pointers into hash
+void chashInsert(chashtable_t *table, unsigned int key, void *val) {
+ struct chashentry *node = &table->table[(key & table->mask)>>1];
+ unsigned int ne=table->numelements++;
+ unsigned int i;
+
+ if (node->key==0) {
+ node->ptr=val;
+ node->key=key;
+ return;
+ }
+
+ if(ne > table->capacity) {
+ //Resize
+ unsigned int newsize = table->size << 1;
+ chashResize(table,newsize);
+ node = &table->table[(key & table->mask)>>1];
+ if (node->key==0) {
+ node->ptr=val;
+ node->key=key;
+ return;
+ }
+ }
+
+
+ for(i=1;1;i++) {
+ node = &table->table[((key+i*331) & table->mask)>>1];
+ if (node->key==0) {
+ node->ptr=val;
+ node->key=key;
+ return;
+ }
+ }
+}
+
+// Search for an address for a given oid
+INLINE void * chashSearch(chashtable_t *table, unsigned int key) {
+ //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
+ struct chashentry *node=&table->table[(key & table->mask)>>1];
+ unsigned int i,ckey;
+
+ if (node->key==key)
+ return node->ptr;
+ if (node->key==0)
+ return NULL;
+
+ for(i=1;1;i++) {
+ node = &table->table[((key+i*331) & table->mask)>>1];
+ ckey=node->key;
+ if (ckey==key)
+ return node->ptr;
+ if (ckey==0)
+ return NULL;
+ }
+}
+
+void chashResize(chashtable_t *table, unsigned int newsize) {
+ unsigned int oldsize=table->size;
+ struct chashentry *ptr= table->table;
+ struct chashentry *node= calloc(newsize, sizeof(struct chashentry));
+ unsigned int mask;
+ unsigned int i;
+ struct chashentry *newnode;
+ unsigned int bin;
+ unsigned int key;
+ struct chashentry *curr;
+
+ if(node == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return;
+ }
+ table->table = node; //Update the global hashtable upon resize()
+ table->size = newsize;
+ table->capacity=table->loadfactor*table->size;
+ mask=(table->mask = (newsize << 1)-1);
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr=&ptr[i];
+ key=curr->key;
+ if (key != 0) {
+ newnode= &table->table[(key&mask)>>1];
+ if (newnode->key==0) {
+ newnode->key=key;
+ newnode->ptr=curr->ptr;
+ continue;
+ }
+
+ for(bin=1;1;bin++) {
+ newnode = &table->table[((key+bin*331) & mask)>>1];
+ if (newnode->key==0) {
+ newnode->key=key;
+ newnode->ptr=curr->ptr;
+ break;
+ }
+ }
+ }
+ }
+ free(ptr); //Free the memory of the old hash table
+}
+
+//Delete the entire hash table
+void chashDelete(chashtable_t *ctable) {
+ free(ctable->table);
+ free(ctable);
+}
--- /dev/null
+#ifndef _CLOOKUP_H_
+#define _CLOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define CLOADFACTOR 0.25
+#define CHASH_SIZE 1024
+
+struct chashentry {
+ void * ptr;
+ unsigned int key;
+};
+
+typedef struct chashtable {
+ struct chashentry *table;
+ unsigned int size;
+ unsigned int mask;
+ unsigned int numelements;
+ unsigned int capacity;
+ float loadfactor;
+} chashtable_t;
+
+/* Prototypes for hash*/
+chashtable_t *chashCreate(unsigned int size, float loadfactor);
+static unsigned int chashFunction(chashtable_t *table, unsigned int key, unsigned int i);
+void chashInsert(chashtable_t *table, unsigned int key, void *val);
+void *chashSearch(chashtable_t *table, unsigned int key); //returns val, NULL if not found
+void chashResize(chashtable_t *table, unsigned int newsize);
+void chashDelete(chashtable_t *table);
+/* end hash */
+
+#endif
+
--- /dev/null
+#ifndef DELAYCOMP_H
+#define DELAYCOMP_H
+
+//There is probably a better way for these...but we'll just hardcode
+//them for now..probably a real implementation would page protect the
+//page after...then default to something simpler
+
+#define MAXPOINTERS 1024*1024*1
+#define MAXVALUES 1024*1024*2
+#define MAXBRANCHES 1024*1024*4
+
+struct pointerlist {
+ int count;
+ void * prev;
+ void * array[MAXPOINTERS];
+};
+
+struct primitivelist {
+ int count;
+ int array[MAXVALUES];
+};
+
+struct branchlist {
+ int count;
+ char array[MAXBRANCHES];
+};
+
+extern __thread struct pointerlist ptrstack;
+extern __thread struct primitivelist primstack;
+extern __thread struct branchlist branchstack;
+
+//Pointers
+
+#define RESTOREPTR(x) x=ptrstack.array[ptrstack.count++];
+
+#define STOREPTR(x) {void * y=COMPOID(x); ptrstack.array[ptrstack.count++]=y; dc_t_chashInsertOnce(y,y);}
+
+#define STOREPTRNOLOCK(x) {void * y=COMPOID(x); ptrstack.array[ptrstack.count++]=y; }
+
+#define STOREPTRNOTRANS(x) {void * y=x; ptrstack.array[ptrstack.count++]=y; dc_t_chashInsertOnce(y,y);}
+
+#define STOREPTRNOLOCKNOTRANS(x) {void * y=x; ptrstack.array[ptrstack.count++]=y; }
+
+//Branches
+
+#define RESTOREANDBRANCH(loc) if (branchstack.array[branchstack.count++]) goto loc
+
+#define STOREANDBRANCH(cond, loc) if (branchstack.array[branchstack.count++]=cond) goto loc
+
+//Integers
+
+#define RESTOREI(x) x=primstack.array[primstack.count++]
+
+#define STOREI(x) primstack.array[primstack.count++]=x
+
+//Floats
+
+#define RESTOREF(x) x=*((float *)&primstack.array[primstack.count++])
+
+#define STOREF(x) *((float *)&primstack.array[primstack.count++])=x
+
+//Doubles
+
+#define RESTORED(x) x=*((double *)&primstack.array[primstack.count]); primstack.count+=2
+
+#define STORED(x) *((double *)&primstack.array[primstack.count])=x; primstack.count+=2
+
+//Bytes
+
+#define RESTOREB(x) x=*((char *)&primstack.array[primstack.count++])
+
+#define STOREB(x) *((char *)&primstack.array[primstack.count++])=x
+
+//Characters
+
+#define RESTOREC(x) x=*((short *)&primstack.array[primstack.count++])
+
+#define STOREC(x) *((short *)&primstack.array[primstack.count++])=x
+
+//Longs
+
+#define RESTOREJ(x) x=*((long long *)&primstack.array[primstack.count]); primstack.count+=2
+
+#define STOREJ(x) *((long long *)&primstack.array[primstack.count])=x; primstack.count+=2
+
+//Booleans
+
+#define RESTOREZ(x) x=primstack.array[primstack.count++]
+
+#define STOREZ(x) primstack.array[primstack.count++]=x
+
+#endif
--- /dev/null
+/*******************************************************************************
+* dht.c
+*
+* High-performance Distributed Hash Table for finding the location of objects
+* in a Distributed Shared Transactional Memory system.
+*
+* Creator: Erik Rubow
+*
+* TODO:
+* 1) Instead of having dhtInsertMult, dhtSearchMult, etc. call their single-key
+* counterparts repeatedly, define some new messages to handle it more
+* efficiently.
+* 2) Improve the efficiency of functions that work with hostArray, hostReplied,
+* and blockOwnerArray.
+* 3) Currently a join or leave causes a rebuild of the entire hash table.
+* Implement more graceful join and leave procedures.
+* 4) Fine tune timeout values for performance, possibly implement a backoff
+* algorithm to prevent overloading the network.
+* 5) Whatever else I'm forgetting
+*
+*******************************************************************************/
+/*******************************************************************************
+* Includes
+*******************************************************************************/
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <sys/poll.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <sys/queue.h>
+#include "dht.h"
+#include "clookup.h" //this works for now, do we need anything better?
+#include "mlookup.h"
+
+/*******************************************************************************
+* Local Defines, Structs
+*******************************************************************************/
+
+#define MAX_MSG_SIZE 1500
+#define UDP_PORT 2157
+#define INIT_HOST_ALLOC 3
+#define INIT_NUM_BLOCKS 16
+#define DEFAULT_INTERFACE "eth0"
+#define TIMEOUT_PERIOD 100
+#define INSERT_TIMEOUT_MS 500
+#define INSERT_RETRIES 50
+#define REMOVE_TIMEOUT_MS 500
+#define REMOVE_RETRIES 50
+#define SEARCH_TIMEOUT_MS 500
+#define SEARCH_RETRIES 50
+
+//message types
+//make sure this matches msg_types global var
+enum
+{
+ INSERT_CMD,
+ INSERT_RES,
+ REMOVE_CMD,
+ REMOVE_RES,
+ SEARCH_CMD,
+ SEARCH_RES,
+ WHO_IS_LEADER_CMD,
+ WHO_IS_LEADER_RES,
+ JOIN_REQ,
+ JOIN_RES,
+ LEAVE_REQ,
+ LEAVE_RES,
+ DHT_UPDATE_CMD,
+ DHT_UPDATE_RES,
+ ELECT_LEADER_CMD,
+ ELECT_LEADER_RES,
+ CONGRATS_CMD,
+ REBUILD_REQ,
+ REBUILD_CMD,
+ FILL_DHT_CMD,
+ FILL_DHT_RES,
+ RESUME_NORMAL_CMD,
+ RESUME_NORMAL_RES,
+ NUM_MSG_TYPES
+};
+
+//states
+//make sure this matches state_names, timeout_vals, and retry_vals global vars
+enum
+{
+ INIT1_STATE,
+ INIT2_STATE,
+ NORMAL_STATE,
+ LEAD_NORMAL1_STATE,
+ LEAD_NORMAL2_STATE,
+ ELECT1_STATE,
+ ELECT2_STATE,
+ REBUILD0_STATE,
+ REBUILD1_STATE,
+ REBUILD2_STATE,
+ REBUILD3_STATE,
+ REBUILD4_STATE,
+ REBUILD5_STATE,
+ LEAD_REBUILD1_STATE,
+ LEAD_REBUILD2_STATE,
+ LEAD_REBUILD3_STATE,
+ LEAD_REBUILD4_STATE,
+ EXIT1_STATE,
+ EXIT2_STATE,
+ NUM_STATES
+};
+
+//status codes
+enum
+{
+ OPERATION_OK,
+ KEY_NOT_FOUND,
+ NOT_KEY_OWNER,
+ NOT_LEADER,
+ INTERNAL_ERROR
+};
+
+struct hostData {
+ unsigned int ipAddr;
+ unsigned int maxKeyCapacity;
+};
+
+/*******************************************************************************
+* Local Function Prototypes
+*******************************************************************************/
+
+int msgSizeOk(unsigned char *msg, unsigned int size);
+unsigned short read2(unsigned char *msg);
+unsigned int read4(unsigned char *msg);
+void write2(unsigned char *ptr, unsigned short tmp);
+void write4(unsigned char *ptr, unsigned int tmp);
+unsigned int getMyIpAddr(const char *interfaceStr);
+int udpSend(unsigned char *msg, unsigned int size, unsigned int destIp);
+int udpSendAll(unsigned char *msg, unsigned int size);
+unsigned int hash(unsigned int x);
+unsigned int getKeyOwner(unsigned int key);
+void setState(unsigned int newState);
+void makeAssignments();
+int addHost(struct hostData newHost);
+int removeHost(unsigned int ipAddr);
+void removeUnresponsiveHosts();
+int checkReplied(unsigned int ipAddr);
+int allReplied();
+void writeHostList();
+void dhtLog(const char *format, ...);
+void *fillTask();
+void *udpListen();
+
+/*******************************************************************************
+* Global Variables
+*******************************************************************************/
+
+//make sure this matches enumeration above
+const char *msg_types[NUM_MSG_TYPES] =
+{
+ "INSERT_CMD",
+ "INSERT_RES",
+ "REMOVE_CMD",
+ "REMOVE_RES",
+ "SEARCH_CMD",
+ "SEARCH_RES",
+ "WHO_IS_LEADER_CMD",
+ "WHO_IS_LEADER_RES",
+ "JOIN_REQ",
+ "JOIN_RES",
+ "LEAVE_REQ",
+ "LEAVE_RES",
+ "DHT_UPDATE_CMD",
+ "DHT_UPDATE_RES",
+ "ELECT_LEADER_CMD",
+ "ELECT_LEADER_RES",
+ "CONGRATS_CMD",
+ "REBUILD_REQ",
+ "REBUILD_CMD",
+ "FILL_DHT_CMD",
+ "FILL_DHT_RES",
+ "RESUME_NORMAL_CMD",
+ "RESUME_NORMAL_RES"
+};
+
+const char *state_names[NUM_STATES] =
+{
+ "INIT1_STATE",
+ "INIT2_STATE",
+ "NORMAL_STATE",
+ "LEAD_NORMAL1_STATE",
+ "LEAD_NORMAL2_STATE",
+ "ELECT1_STATE",
+ "ELECT2_STATE",
+ "REBUILD0_STATE",
+ "REBUILD1_STATE",
+ "REBUILD2_STATE",
+ "REBUILD3_STATE",
+ "REBUILD4_STATE",
+ "REBUILD5_STATE",
+ "LEAD_REBUILD1_STATE",
+ "LEAD_REBUILD2_STATE",
+ "LEAD_REBUILD3_STATE",
+ "LEAD_REBUILD4_STATE",
+ "EXIT1_STATE",
+ "EXIT2_STATE",
+};
+
+//note: { 0, 0 } means no timeout
+struct timeval timeout_vals[NUM_STATES] ={
+ { 0, 500000 }, //INIT1_STATE
+ { 0, 500000 }, //INIT2_STATE
+ { 0, 0 }, //NORMAL_STATE
+ { 0, 0 }, //LEAD_NORMAL1_STATE
+ { 3, 0 }, //LEAD_NORMAL2_STATE
+ { 1, 0 }, //ELECT1_STATE
+ { 1, 0 }, //ELECT2_STATE
+ { 0, 500000 }, //REBUILD0_STATE
+ { 0, 500000 }, //REBUILD1_STATE
+ { 10, 0 }, //REBUILD2_STATE
+ { 10, 0 }, //REBUILD3_STATE
+ { 10, 0 }, //REBUILD4_STATE
+ { 1, 0 }, //REBUILD5_STATE
+ { 1, 0 }, //LEAD_REBUILD1_STATE
+ { 1, 0 }, //LEAD_REBUILD2_STATE
+ { 10, 0 }, //LEAD_REBUILD3_STATE
+ { 10, 0 }, //LEAD_REBUILD4_STATE
+ { 0, 500000 }, //EXIT1_STATE
+ { 0, 0 } //EXIT2_STATE
+};
+
+int retry_vals[NUM_STATES] =
+{
+ 100, //INIT1_STATE
+ 10, //INIT2_STATE
+ 0, //NORMAL_STATE
+ 0, //LEAD_NORMAL1_STATE
+ 0, //LEAD_NORMAL2_STATE
+ 10, //ELECT1_STATE
+ 10, //ELECT2_STATE
+ 10, //REBUILD0_STATE
+ 10, //REBUILD1_STATE
+ 0, //REBUILD2_STATE
+ 0, //REBUILD3_STATE
+ 0, //REBUILD4_STATE
+ 10, //REBUILD5_STATE
+ 10, //LEAD_REBUILD1_STATE
+ 10, //LEAD_REBUILD2_STATE
+ 10, //LEAD_REBUILD3_STATE
+ 10, //LEAD_REBUILD4_STATE
+ 10, //EXIT1_STATE
+ 0 //EXIT2_STATE
+};
+
+FILE *logfile;
+struct hostData myHostData;
+pthread_t threadUdpListen;
+pthread_t threadFillTask;
+//status of fillTask: 0 = ready to run, 1 = running, 2 = completed, 3 = error
+int fillStatus;
+struct pollfd udpPollSock;
+unsigned int state;
+unsigned int seed;
+unsigned int leader;
+unsigned int electionOriginator;
+unsigned int electionParent;
+unsigned int hostArraySize = 0;
+struct hostData *hostArray = NULL;
+unsigned int numBlocks = 0;
+unsigned short *blockOwnerArray = NULL;
+unsigned char *hostReplied = NULL;
+pthread_mutex_t stateMutex;
+pthread_cond_t stateCond;
+chashtable_t *myHashTable;
+unsigned int numHosts;
+struct timeval timer;
+int timerSet;
+int timeoutCntr;
+
+/*******************************************************************************
+* Interface Function Definitions
+*******************************************************************************/
+
+void dhtInit(unsigned int seedIpAddr, unsigned int maxKeyCapacity) {
+ struct in_addr tmpAddr;
+ char filename[23] = "dht-";
+ struct sockaddr_in myAddr;
+ struct sockaddr_in seedAddr;
+ socklen_t socklen = sizeof(struct sockaddr_in);
+ char initMsg;
+
+ tmpAddr.s_addr = htonl(getMyIpAddr(DEFAULT_INTERFACE));
+ strcat(filename, inet_ntoa(tmpAddr));
+ strcat(filename, ".log");
+ printf("log file: %s\n", filename);
+
+ logfile = fopen(filename, "w");
+ dhtLog("dhtInit(): inializing...\n");
+
+ myHostData.ipAddr = getMyIpAddr(DEFAULT_INTERFACE);
+ myHostData.maxKeyCapacity = maxKeyCapacity;
+
+ seed = seedIpAddr;
+ leader = 0;
+ electionOriginator = 0;
+ electionParent = 0;
+ hostArraySize = INIT_HOST_ALLOC;
+ hostArray = calloc(hostArraySize, sizeof(struct hostData));
+ hostReplied = calloc(hostArraySize, sizeof(unsigned char));
+ hostArray[0] = myHostData;
+ numHosts = 1;
+ numBlocks = INIT_NUM_BLOCKS;
+ blockOwnerArray = calloc(numBlocks, sizeof(unsigned short));
+ pthread_mutex_init(&stateMutex, NULL);
+ pthread_cond_init(&stateCond, NULL);
+ myHashTable = chashCreate(HASH_SIZE, LOADFACTOR);
+
+ udpPollSock.fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (udpPollSock.fd < 0)
+ perror("dhtInit():socket()");
+
+ udpPollSock.events = POLLIN;
+
+ bzero(&myAddr, socklen);
+ myAddr.sin_family = AF_INET;
+ myAddr.sin_addr.s_addr = htonl(INADDR_ANY);
+ myAddr.sin_port = htons(UDP_PORT);
+
+ if (bind(udpPollSock.fd, (struct sockaddr *)&myAddr, socklen) < 0)
+ perror("dhtInit():bind()");
+
+ if (seed == 0) {
+ dhtLog("I am the leader\n");
+ leader = myHostData.ipAddr;
+ setState(LEAD_NORMAL1_STATE);
+ } else
+ {
+ initMsg = WHO_IS_LEADER_CMD;
+ udpSend(&initMsg, 1, seed);
+ setState(INIT1_STATE);
+ }
+
+ if (pthread_create(&threadUdpListen, NULL, udpListen, NULL) != 0)
+ dhtLog("dhtInit() - ERROR creating threadUdpListen\n");
+
+ return;
+}
+
+void dhtExit() { //TODO: do this gracefully, wait for response from leader, etc.
+ char msg;
+
+ msg = LEAVE_REQ;
+ udpSend(&msg, 1, leader);
+ dhtLog("dhtExit(): cleaning up...\n");
+ pthread_cancel(threadUdpListen);
+ close(udpPollSock.fd);
+ free(hostArray);
+ free(hostReplied);
+ free(blockOwnerArray);
+ fclose(logfile);
+
+ return;
+}
+
+int dhtInsert(unsigned int key, unsigned int val) {
+ struct sockaddr_in toAddr;
+ struct sockaddr_in fromAddr;
+ socklen_t socklen = sizeof(struct sockaddr_in);
+ struct pollfd pollsock;
+ char inBuffer[2];
+ char outBuffer[9];
+ ssize_t bytesRcvd;
+ int i;
+ int retval;
+ int status = -1;
+
+ bzero((char *)&toAddr, socklen);
+ toAddr.sin_family = AF_INET;
+ toAddr.sin_port = htons(UDP_PORT);
+
+ while (status != OPERATION_OK) {
+ pthread_mutex_lock(&stateMutex);
+ while (!(state == NORMAL_STATE || state == LEAD_NORMAL1_STATE
+ || state == LEAD_NORMAL2_STATE || state == REBUILD4_STATE
+ || state == LEAD_REBUILD3_STATE))
+ pthread_cond_wait(&stateCond, &stateMutex);
+ toAddr.sin_addr.s_addr = htonl(getKeyOwner(key));
+ pthread_mutex_unlock(&stateMutex);
+
+ if ((pollsock.fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ perror("dhtInsert():socket()");
+ return -1;
+ }
+ pollsock.events = POLLIN;
+
+ outBuffer[0] = INSERT_CMD;
+ write4(&outBuffer[1], key);
+ write4(&outBuffer[5], val);
+
+ for (i = 0; i < INSERT_RETRIES; i++) {
+ if (sendto(pollsock.fd, outBuffer, 9, 0, (struct sockaddr *)&toAddr,
+ socklen) < 0) {
+ perror("dhtInsert():sendto()");
+ break;
+ }
+ retval = poll(&pollsock, 1, INSERT_TIMEOUT_MS);
+ if (retval < 0) {
+ perror("dhtInsert():poll()");
+ break;
+ }
+ if (retval > 0) {
+ bytesRcvd = recvfrom(pollsock.fd, inBuffer, 2, 0,
+ (struct sockaddr *)&fromAddr, &socklen);
+ if (fromAddr.sin_addr.s_addr == toAddr.sin_addr.s_addr
+ && fromAddr.sin_port == toAddr.sin_port
+ && bytesRcvd == 2 && inBuffer[0] == INSERT_RES) {
+ status = inBuffer[1]; //status from remote host
+ break;
+ }
+ }
+ }
+ if (status != OPERATION_OK) {
+ pthread_mutex_lock(&stateMutex);
+ setState(REBUILD0_STATE);
+ outBuffer[0] = REBUILD_REQ;
+ udpSend(outBuffer, 1, leader);
+ pthread_mutex_unlock(&stateMutex);
+ }
+ }
+
+ close(pollsock.fd);
+
+ return status;
+}
+
+int dhtInsertMult(unsigned int numKeys, unsigned int *keys, unsigned int *vals) {
+ int status;
+ int i;
+
+ status = 0;
+ for (i = 0; i < numKeys; i++) {
+ if (dhtInsert(keys[i], vals[i]) != 0)
+ status = -1;
+ }
+ return status;
+}
+
+int dhtRemove(unsigned int key) {
+ struct sockaddr_in toAddr;
+ struct sockaddr_in fromAddr;
+ socklen_t socklen = sizeof(struct sockaddr_in);
+ struct pollfd pollsock;
+ char inBuffer[2];
+ char outBuffer[5];
+ ssize_t bytesRcvd;
+ int i;
+ int retval;
+ int status = -1;
+
+ bzero((char *)&toAddr, socklen);
+ toAddr.sin_family = AF_INET;
+ toAddr.sin_port = htons(UDP_PORT);
+
+ while (!(status == OPERATION_OK || status == KEY_NOT_FOUND)) {
+ pthread_mutex_lock(&stateMutex);
+ while (!(state == NORMAL_STATE || state == LEAD_NORMAL1_STATE
+ || state == LEAD_NORMAL2_STATE))
+ pthread_cond_wait(&stateCond, &stateMutex);
+ toAddr.sin_addr.s_addr = htonl(getKeyOwner(key));
+ pthread_mutex_unlock(&stateMutex);
+
+ if ((pollsock.fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ perror("dhtRemove():socket()");
+ return -1;
+ }
+ pollsock.events = POLLIN;
+
+ outBuffer[0] = REMOVE_CMD;
+ write4(&outBuffer[1], key);
+
+ for (i = 0; i < REMOVE_RETRIES; i++) {
+ if (sendto(pollsock.fd, outBuffer, 5, 0, (struct sockaddr *)&toAddr,
+ socklen) < 0) {
+ perror("dhtRemove():sendto()");
+ break;
+ }
+ retval = poll(&pollsock, 1, REMOVE_TIMEOUT_MS);
+ if (retval < 0) {
+ perror("dhtRemove():poll()");
+ break;
+ }
+ if (retval > 0) {
+ bytesRcvd = recvfrom(pollsock.fd, inBuffer, 2, 0,
+ (struct sockaddr *)&fromAddr, &socklen);
+ if (fromAddr.sin_addr.s_addr == toAddr.sin_addr.s_addr
+ && fromAddr.sin_port == toAddr.sin_port
+ && bytesRcvd == 2 && inBuffer[0] == REMOVE_RES) {
+ status = inBuffer[1]; //status from remote host
+ break;
+ }
+ }
+ }
+ if (!(status == OPERATION_OK || status == KEY_NOT_FOUND)) {
+ pthread_mutex_lock(&stateMutex);
+ setState(REBUILD0_STATE);
+ outBuffer[0] = REBUILD_REQ;
+ udpSend(outBuffer, 1, leader);
+ pthread_mutex_unlock(&stateMutex);
+ }
+ }
+
+ close(pollsock.fd);
+
+ return status;
+}
+
+int dhtRemoveMult(unsigned int numKeys, unsigned int *keys) {
+ int status;
+ int i;
+
+ status = 0;
+ for (i = 0; i < numKeys; i++) {
+ if (dhtRemove(keys[i]) != 0)
+ status = -1;
+ }
+ return status;
+}
+
+int dhtSearch(unsigned int key, unsigned int *val) {
+ struct sockaddr_in toAddr;
+ struct sockaddr_in fromAddr;
+ socklen_t socklen = sizeof(struct sockaddr_in);
+ struct pollfd pollsock;
+ char inBuffer[6];
+ char outBuffer[5];
+ ssize_t bytesRcvd;
+ int i;
+ int retval;
+ int status = -1;
+
+ bzero((char *)&toAddr, socklen);
+ toAddr.sin_family = AF_INET;
+ toAddr.sin_port = htons(UDP_PORT);
+
+ while (!(status == OPERATION_OK || status == KEY_NOT_FOUND)) {
+ pthread_mutex_lock(&stateMutex);
+ while (numBlocks == 0)
+ pthread_cond_wait(&stateCond, &stateMutex);
+ toAddr.sin_addr.s_addr = htonl(getKeyOwner(key));
+ pthread_mutex_unlock(&stateMutex);
+
+ if ((pollsock.fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ perror("dhtSearch():socket()");
+ return -1;
+ }
+ pollsock.events = POLLIN;
+
+ outBuffer[0] = SEARCH_CMD;
+ write4(&outBuffer[1], key);
+
+ for (i = 0; i < SEARCH_RETRIES; i++) {
+ if (sendto(pollsock.fd, outBuffer, 5, 0, (struct sockaddr *)&toAddr,
+ socklen) < 0) {
+ perror("dhtSearch():sendto()");
+ break;
+ }
+ retval = poll(&pollsock, 1, SEARCH_TIMEOUT_MS);
+ if (retval < 0) {
+ perror("dhtSearch():poll()");
+ break;
+ }
+ if (retval > 0) {
+ bytesRcvd = recvfrom(pollsock.fd, inBuffer, 6, 0,
+ (struct sockaddr *)&fromAddr, &socklen);
+ if (fromAddr.sin_addr.s_addr == toAddr.sin_addr.s_addr
+ && fromAddr.sin_port == toAddr.sin_port
+ && bytesRcvd == 6 && inBuffer[0] == SEARCH_RES) {
+ status = inBuffer[1]; //status from remote host
+ *val = read4(&inBuffer[2]);
+ break;
+ }
+ }
+ }
+ if (!(status == OPERATION_OK || status == KEY_NOT_FOUND)) {
+ pthread_mutex_lock(&stateMutex);
+ setState(REBUILD0_STATE);
+ outBuffer[0] = REBUILD_REQ;
+ udpSend(outBuffer, 1, leader);
+ pthread_mutex_unlock(&stateMutex);
+ }
+ }
+
+ close(pollsock.fd);
+
+ return status;
+}
+
+int dhtSearchMult(unsigned int numKeys, unsigned int *keys, unsigned int *vals) {
+ int i;
+ int status = 0;
+ for (i = 0; i < numKeys; i++) {
+ if (dhtSearch(keys[i], &vals[i]) != 0)
+ status = -1;
+ }
+ return status;
+}
+
+/*******************************************************************************
+* Local Function Definitions
+*******************************************************************************/
+
+int msgSizeOk(unsigned char *msg, unsigned int size) {
+ unsigned short tmpNumHosts;
+ unsigned short tmpNumBlocks;
+
+ if (size < 1)
+ return 1;
+
+ switch (msg[0]) {
+ case WHO_IS_LEADER_CMD:
+ case LEAVE_REQ:
+ case LEAVE_RES:
+ case DHT_UPDATE_RES:
+ case REBUILD_REQ:
+ case REBUILD_CMD:
+ case FILL_DHT_CMD:
+ case FILL_DHT_RES:
+ case RESUME_NORMAL_CMD:
+ case RESUME_NORMAL_RES:
+ return (size == 1);
+
+ case INSERT_RES:
+ case REMOVE_RES:
+ case JOIN_RES:
+ return (size == 2);
+
+ case REMOVE_CMD:
+ case SEARCH_CMD:
+ case WHO_IS_LEADER_RES:
+ case JOIN_REQ:
+ case ELECT_LEADER_CMD:
+ return (size == 5);
+
+ case SEARCH_RES:
+ return (size == 6);
+
+ case INSERT_CMD:
+ return (size == 9);
+
+ case DHT_UPDATE_CMD:
+ if (size < 5)
+ return 1;
+ tmpNumHosts = read2(&msg[1]);
+ tmpNumBlocks = read2(&msg[3]);
+ return (size == (5+sizeof(struct hostData)*tmpNumHosts+2*tmpNumBlocks));
+
+ case ELECT_LEADER_RES:
+ if (size < 2)
+ return 1;
+ if (msg[1] == 0xFF)
+ return (size == 2);
+ if (size < 4)
+ return 1;
+ tmpNumHosts = read2(&msg[2]);
+ return (size == (4 + sizeof(struct hostData) * tmpNumHosts));
+
+ case CONGRATS_CMD:
+ if (size < 3)
+ return 1;
+ tmpNumHosts = read2(&msg[1]);
+ return (size == (3 + sizeof(struct hostData) * tmpNumHosts));
+
+ default:
+ return 1;
+ }
+}
+
+unsigned short read2(unsigned char *ptr) {
+ unsigned short tmp = (ptr[1] << 8) | ptr[0];
+ return tmp;
+}
+
+unsigned int read4(unsigned char *ptr) {
+ unsigned int tmp = (ptr[3] << 24) | (ptr[2] << 16) | (ptr[1] << 8) | ptr[0];
+ return tmp;
+}
+
+void write2(unsigned char *ptr, unsigned short tmp) {
+ ptr[1] = (tmp >> 8) & 0xFF;
+ ptr[0] = tmp & 0xFF;
+ return;
+}
+
+void write4(unsigned char *ptr, unsigned int tmp) {
+ ptr[3] = (tmp >> 24) & 0xFF;
+ ptr[2] = (tmp >> 16) & 0xFF;
+ ptr[1] = (tmp >> 8) & 0xFF;
+ ptr[0] = tmp & 0xFF;
+ return;
+}
+
+unsigned int getMyIpAddr(const char *interfaceStr) {
+ int sock;
+ struct ifreq interfaceInfo;
+ struct sockaddr_in *myAddr = (struct sockaddr_in *)&interfaceInfo.ifr_addr;
+
+ memset(&interfaceInfo, 0, sizeof(struct ifreq));
+
+ if((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("getMyIpAddr():socket()");
+ return 1;
+ }
+
+ strcpy(interfaceInfo.ifr_name, interfaceStr);
+ myAddr->sin_family = AF_INET;
+
+ if(ioctl(sock, SIOCGIFADDR, &interfaceInfo) != 0) {
+ perror("getMyIpAddr():ioctl()");
+ return 1;
+ }
+
+ return ntohl(myAddr->sin_addr.s_addr);
+}
+
+int udpSend(unsigned char *msg, unsigned int size, unsigned int destIp) {
+ struct sockaddr_in peerAddr;
+ socklen_t socklen = sizeof(struct sockaddr_in);
+
+ bzero(&peerAddr, socklen);
+ peerAddr.sin_family = AF_INET;
+ peerAddr.sin_addr.s_addr = htonl(destIp);
+ peerAddr.sin_port = htons(UDP_PORT);
+
+ if (size >= 1) {
+ if (msg[0] < NUM_MSG_TYPES)
+ dhtLog("udpSend(): sending %s to %s, %d bytes\n", msg_types[msg[0]],
+ inet_ntoa(peerAddr.sin_addr), size);
+ else
+ dhtLog("udpSend(): sending unknown message to %s, %d bytes\n",
+ inet_ntoa(peerAddr.sin_addr), size);
+ }
+
+ if (sendto(udpPollSock.fd, (void *)msg, size, 0, (struct sockaddr *)&peerAddr,
+ socklen) < 0) {
+ perror("udpSend():sendto()");
+ return -1;
+ }
+
+ return 0;
+}
+
+int udpSendAll(unsigned char *msg, unsigned int size) {
+ int i;
+ int status = 0;
+ for (i = 0; i < numHosts; i++) {
+ if ((hostReplied[i] == 0) && (hostArray[i].ipAddr != myHostData.ipAddr)) {
+ if (udpSend(msg, size, hostArray[i].ipAddr) != 0)
+ status = -1;
+ }
+ }
+ return status;
+}
+
+//note: make sure this is only executed in a valid state, where numBlocks != 0
+unsigned int hash(unsigned int x) {
+ return (x % numBlocks);
+}
+
+//note: make sure this is only executed in a valid state, where these arrays
+// are allocated and the index mappings are consistent
+unsigned int getKeyOwner(unsigned int key) {
+ return hostArray[blockOwnerArray[hash(key)]].ipAddr;
+}
+
+//sets state and timer, if applicable
+void setState(unsigned int newState) {
+ struct timeval now;
+ int i;
+
+ gettimeofday(&now, NULL);
+
+ if (newState >= NUM_STATES) {
+ dhtLog("setState(): ERROR: invalid state %d\n", newState);
+ } else
+ {
+ if (timeout_vals[newState].tv_sec == 0
+ && timeout_vals[newState].tv_usec == 0) { //no timer
+ timerSet = 0;
+ } else
+ {
+ timeradd(&now, &timeout_vals[newState], &timer);
+ timerSet = 1;
+ }
+ timeoutCntr = 0;
+ state = newState;
+ //TODO: only do this for states that require it
+ for (i = 0; i < numHosts; i++)
+ hostReplied[i] = 0;
+
+ dhtLog("setState(): state set to %s\n", state_names[state]);
+ }
+
+ return;
+}
+
+//TODO: improve these simple and inefficient functions
+int checkReplied(unsigned int ipAddr) {
+ int i;
+
+ i = findHost(ipAddr);
+
+ if (i == -1)
+ return -1;
+
+ hostReplied[i] = 1;
+
+ return 0;
+}
+
+int allReplied() {
+ int i;
+
+ for (i = 0; i < numHosts; i++)
+ if ((hostReplied[i] == 0) && (hostArray[i].ipAddr != myHostData.ipAddr))
+ return 0;
+
+ return 1;
+}
+
+int findHost(unsigned int ipAddr) {
+ int i;
+
+ for (i = 0; i < numHosts; i++)
+ if (hostArray[i].ipAddr == ipAddr)
+ return i; //found, return index
+
+ return -1; //not found
+}
+
+int removeHost(unsigned int ipAddr) {
+ int i, j;
+
+ i = findHost(ipAddr);
+
+ if (i == -1)
+ return -1;
+
+ for (j = 0; j < numBlocks; j++) {
+ if (blockOwnerArray[j] == i)
+ blockOwnerArray[j] = 0; //TODO: is this what I want to have happen?
+ else if (blockOwnerArray[j] > i)
+ blockOwnerArray[j]--;
+ }
+
+ for (; i < numHosts - 1; i++) {
+ hostArray[i] = hostArray[i+1];
+ hostReplied[i] = hostReplied[i+1];
+ }
+ numHosts--;
+
+ return 0;
+}
+
+void removeUnresponsiveHosts() {
+ int i;
+
+ for (i = 0; i < numHosts; i++) {
+ if (!hostReplied[i] && hostArray[i].ipAddr != myHostData.ipAddr)
+ removeHost(hostArray[i].ipAddr);
+ }
+}
+
+int addHost(struct hostData newHost) {
+ struct hostData *newHostArray;
+ unsigned char *newHostReplied;
+ int i;
+ int j;
+
+ for (i = 0; i < numHosts; i++) {
+ if (hostArray[i].ipAddr == newHost.ipAddr) {
+ hostArray[i] = newHost;
+ hostReplied[i] = 0;
+ return 0;
+ } else if (hostArray[i].ipAddr > newHost.ipAddr) {
+ if (numHosts == hostArraySize) {
+ newHostArray = calloc(2 * hostArraySize, sizeof(struct hostData));
+ newHostReplied = calloc(2 * hostArraySize, sizeof(unsigned char));
+ memcpy(newHostArray, hostArray, (i * sizeof(struct hostData)));
+ memcpy(newHostReplied, hostReplied, (i * sizeof(unsigned char)));
+ newHostArray[i] = newHost;
+ newHostReplied[i] = 0;
+ memcpy(&newHostArray[i+1], &hostArray[i], ((numHosts - i) *
+ sizeof(struct hostData)));
+ memcpy(&newHostReplied[i+1], &hostReplied[i], ((numHosts - i) *
+ sizeof(unsigned char)));
+ free(hostArray);
+ free(hostReplied);
+ hostArray = newHostArray;
+ hostReplied = newHostReplied;
+ hostArraySize = 2 * hostArraySize;
+ } else
+ {
+ for (j = numHosts; j > i; j--) {
+ hostArray[j] = hostArray[j-1];
+ hostReplied[j] = hostReplied[j-1];
+ }
+ hostArray[i] = newHost;
+ hostReplied[i] = 0;
+ }
+ for(j = 0; j < numBlocks; j++) {
+ if (blockOwnerArray[j] >= i)
+ blockOwnerArray[j]++;
+ }
+ numHosts++;
+ return 1;
+ }
+ }
+
+ //nothing greater, add to end
+ if (numHosts == hostArraySize) {
+ newHostArray = calloc(2 * hostArraySize, sizeof(struct hostData));
+ newHostReplied = calloc(2 * hostArraySize, sizeof(unsigned char));
+ memcpy(newHostArray, hostArray, (numHosts * sizeof(struct hostData)));
+ memcpy(newHostReplied, hostReplied, (numHosts * sizeof(unsigned char)));
+ free(hostArray);
+ free(hostReplied);
+ hostArray = newHostArray;
+ hostReplied = newHostReplied;
+ hostArraySize = 2 * hostArraySize;
+ }
+
+ hostArray[numHosts] = newHost;
+ hostReplied[numHosts] = 0;
+ numHosts++;
+ return 1;
+}
+
+void makeAssignments() {
+ int i;
+
+ if (numBlocks < numHosts) {
+ free(blockOwnerArray);
+ while (numBlocks < numHosts)
+ numBlocks *= 2;
+ blockOwnerArray = calloc(numBlocks, sizeof(unsigned short));
+ }
+
+ for (i = 0; i < numBlocks; i++)
+ blockOwnerArray[i] = i % numHosts;
+
+ return;
+}
+
+void writeHostList() {
+ int i;
+ struct in_addr tmpAddr;
+
+ fprintf(logfile, "numHosts = %d\n", numHosts);
+ for (i = 0; i < numHosts; i++) {
+ tmpAddr.s_addr = htonl(hostArray[i].ipAddr);
+ fprintf(logfile, "%d) %s, %d\n", i, inet_ntoa(tmpAddr),
+ hostArray[i].maxKeyCapacity);
+ }
+ return;
+}
+
+void dhtLog(const char *format, ...) {
+ va_list args;
+// struct timeval now;
+
+// if (gettimeofday(&now, NULL) < 0)
+// { perror("dhtLog():gettimeofday()"); }
+ va_start(args, format);
+// if (fprintf(logfile, "%d.%06d:", now.tv_sec, now.tv_usec) < 0)
+// { perror("dhtLog():fprintf()"); }
+ if (vfprintf(logfile, format, args) < 0) {
+ perror("dhtLog():vfprintf()");
+ }
+ if (fflush(logfile) == EOF) {
+ perror("dhtLog():fflush()");
+ }
+ va_end(args);
+
+ return;
+}
+
+void *fillTask() {
+ unsigned int *vals;
+ unsigned int *keys;
+ unsigned int numKeys;
+ int i;
+
+ vals = mhashGetKeys(&numKeys); //note: key of mhash is val of dht
+ keys = calloc(numKeys, sizeof(unsigned int));
+
+ for (i = 0; i < numKeys; i++)
+ keys[i] = myHostData.ipAddr;
+
+ if (dhtInsertMult(numKeys, keys, vals) == 0)
+ fillStatus = 2;
+ else
+ fillStatus = 3;
+
+ pthread_exit(NULL);
+}
+
+void *udpListen() {
+ ssize_t bytesRcvd;
+ struct sockaddr_in peerAddr;
+ unsigned int peerIp;
+ socklen_t socklen = sizeof(struct sockaddr_in);
+ unsigned char inBuffer[MAX_MSG_SIZE];
+ unsigned char outBuffer[MAX_MSG_SIZE];
+ int pollret;
+ struct timeval now;
+ struct in_addr tmpAddr;
+ struct hostData tmpHost;
+ unsigned int tmpKey;
+ unsigned int tmpVal;
+ struct hostData *hostDataPtr;
+ unsigned short *uShortPtr;
+ unsigned int tmpUInt;
+ unsigned int tmpUShort;
+ int i;
+ unsigned int oldState;
+
+ dhtLog("udpListen(): linstening on port %d...\n", UDP_PORT);
+
+ while (1) {
+ pollret = poll(&udpPollSock, 1, TIMEOUT_PERIOD);
+ pthread_mutex_lock(&stateMutex);
+ oldState = state;
+ if (pollret < 0) {
+ perror("udpListen():poll()");
+ } else if (pollret > 0) {
+ bytesRcvd = recvfrom(udpPollSock.fd, inBuffer, MAX_MSG_SIZE, 0,
+ (struct sockaddr *)&peerAddr, &socklen);
+ if (bytesRcvd < 1) {
+ dhtLog("udpListen(): ERROR: bytesRcvd = %d\n", bytesRcvd);
+ } else if (inBuffer[0] >= NUM_MSG_TYPES) {
+ dhtLog("udpListen(): ERROR: unknown msg type = %d\n", inBuffer[0]);
+ } else if (!msgSizeOk(inBuffer, bytesRcvd)) {
+ dhtLog("udpListen(): ERROR: msg size not ok: type = %s\n, size = %d\n",
+ msg_types[inBuffer[0]], bytesRcvd);
+ } else if (state == EXIT2_STATE) {
+ //do nothing
+ } else if (state == INIT1_STATE) { //after initialization with seed, do not proceed until seed replies
+ dhtLog("udpListen(): received %s from %s, %d bytes\n",
+ msg_types[inBuffer[0]], inet_ntoa(peerAddr.sin_addr), bytesRcvd);
+ for (i = 0; i < bytesRcvd; i++)
+ dhtLog(" %x", inBuffer[i]);
+ dhtLog("\n");
+ peerIp = ntohl(peerAddr.sin_addr.s_addr);
+ if (peerIp == seed && inBuffer[0] == WHO_IS_LEADER_RES) {
+ tmpHost.ipAddr = peerIp;
+ tmpHost.maxKeyCapacity = 0;
+ addHost(tmpHost);
+ writeHostList();
+ leader = read4(&inBuffer[1]);
+ tmpAddr.s_addr = htonl(leader);
+ dhtLog("leader = %s\n", inet_ntoa(tmpAddr));
+ if (leader != 0) {
+ setState(INIT2_STATE);
+ outBuffer[0] = JOIN_REQ;
+ write4(&outBuffer[1], myHostData.maxKeyCapacity);
+ udpSend(outBuffer, 5, leader);
+ } else
+ {
+ electionOriginator = myHostData.ipAddr;
+ setState(ELECT1_STATE);
+ outBuffer[0] = ELECT_LEADER_CMD;
+ write4(&outBuffer[1], myHostData.ipAddr); //originator = me
+ udpSendAll(outBuffer, 5);
+ }
+ }
+ } else
+ {
+ dhtLog("udpListen(): received %s from %s, %d bytes\n",
+ msg_types[inBuffer[0]], inet_ntoa(peerAddr.sin_addr), bytesRcvd);
+ for (i = 0; i < bytesRcvd; i++)
+ dhtLog(" %x", inBuffer[i]);
+ dhtLog("\n");
+ peerIp = ntohl(peerAddr.sin_addr.s_addr);
+ switch (inBuffer[0]) {
+ case INSERT_CMD:
+ if (state == NORMAL_STATE || state == LEAD_NORMAL1_STATE
+ || state == LEAD_NORMAL2_STATE || state == REBUILD4_STATE
+ || state == REBUILD5_STATE || state == LEAD_REBUILD3_STATE) {
+ tmpKey = read4(&inBuffer[1]);
+ tmpVal = read4(&inBuffer[5]);
+ outBuffer[0] = INSERT_RES;
+ if (getKeyOwner(tmpKey) == myHostData.ipAddr) {
+ if (chashInsert(myHashTable, tmpKey, (void *)tmpVal) == 0)
+ outBuffer[1] = OPERATION_OK;
+ else
+ outBuffer[1] = INTERNAL_ERROR;
+ } else
+ {
+ outBuffer[1] = NOT_KEY_OWNER;
+ }
+ //reply to client socket
+ sendto(udpPollSock.fd, outBuffer, 2, 0,
+ (struct sockaddr *)&peerAddr, socklen);
+ }
+ break;
+
+ case REMOVE_CMD:
+ if (state == NORMAL_STATE || state == LEAD_NORMAL1_STATE
+ || state == LEAD_NORMAL2_STATE) {
+ tmpKey = read4(&inBuffer[1]);
+ outBuffer[0] = REMOVE_RES;
+ if (getKeyOwner(tmpKey) == myHostData.ipAddr) {
+ if (chashRemove(myHashTable, tmpKey) == 0)
+ outBuffer[1] = OPERATION_OK;
+ else
+ outBuffer[1] = KEY_NOT_FOUND;
+ } else
+ {
+ outBuffer[1] = NOT_KEY_OWNER;
+ }
+ //reply to client socket
+ sendto(udpPollSock.fd, outBuffer, 2, 0,
+ (struct sockaddr *)&peerAddr, socklen);
+ }
+ break;
+
+ case SEARCH_CMD:
+ if (state == NORMAL_STATE || state == LEAD_NORMAL1_STATE
+ || state == LEAD_NORMAL2_STATE) {
+ tmpKey = read4(&inBuffer[1]);
+ outBuffer[0] = SEARCH_RES;
+ if (getKeyOwner(tmpKey) == myHostData.ipAddr) {
+ if ((tmpVal = (unsigned int)chashSearch(myHashTable, tmpKey)) != 0) {
+ outBuffer[1] = OPERATION_OK;
+ write4(&outBuffer[2], tmpVal);
+ } else
+ {
+ outBuffer[1] = KEY_NOT_FOUND;
+ write4(&outBuffer[2], 0);
+ }
+ } else
+ {
+ outBuffer[1] = NOT_KEY_OWNER;
+ write4(&outBuffer[2], 0);
+ }
+ //reply to client socket
+ sendto(udpPollSock.fd, outBuffer, 6, 0,
+ (struct sockaddr *)&peerAddr, socklen);
+ }
+ break;
+
+ case WHO_IS_LEADER_CMD:
+ tmpHost.ipAddr = peerIp;
+ tmpHost.maxKeyCapacity = 0;
+ addHost(tmpHost);
+ writeHostList();
+ outBuffer[0] = WHO_IS_LEADER_RES;
+ //leader == 0 means I don't know who it is
+ write4(&outBuffer[1], leader);
+ udpSend(outBuffer, 5, peerIp);
+ break;
+
+ case JOIN_REQ:
+ if (state == LEAD_NORMAL1_STATE || state == LEAD_NORMAL2_STATE) {
+ tmpHost.ipAddr = peerIp;
+ tmpHost.maxKeyCapacity = read4(&inBuffer[1]);
+ addHost(tmpHost);
+ writeHostList();
+ if (state == LEAD_NORMAL1_STATE)
+ setState(LEAD_NORMAL2_STATE);
+ outBuffer[0] = JOIN_RES;
+ outBuffer[1] = 0; //status, success
+ udpSend(outBuffer, 2, peerIp);
+ } else if (state == LEAD_REBUILD1_STATE) {
+ //note: I don't need to addHost().
+ checkReplied(peerIp);
+ outBuffer[0] = JOIN_RES;
+ outBuffer[1] = 0; //status, success
+ udpSend(outBuffer, 2, peerIp);
+ if (allReplied()) {
+ makeAssignments();
+ setState(LEAD_REBUILD2_STATE);
+ outBuffer[0] = DHT_UPDATE_CMD;
+ write2(&outBuffer[1], numHosts);
+ write2(&outBuffer[3], numBlocks);
+ memcpy(&outBuffer[5], hostArray, numHosts*sizeof(struct hostData));
+ memcpy(&outBuffer[5+numHosts*sizeof(struct hostData)],
+ blockOwnerArray, numBlocks*2);
+ udpSendAll(outBuffer, 5 + sizeof(struct hostData) * numHosts
+ + 2 * numBlocks);
+ }
+ }
+ break;
+
+ case JOIN_RES:
+ if (state == REBUILD1_STATE) {
+ setState(REBUILD2_STATE);
+ } else if (state == INIT2_STATE) {
+ setState(NORMAL_STATE);
+ }
+ break;
+
+ case LEAVE_REQ:
+ if (state == LEAD_NORMAL1_STATE || state == LEAD_NORMAL2_STATE) { //TODO: make this graceful, instead of just rebuilding
+ removeHost(peerIp);
+ if (state != LEAD_NORMAL2_STATE)
+ setState(LEAD_NORMAL2_STATE);
+ }
+ break;
+
+ case DHT_UPDATE_CMD:
+ if (state == REBUILD2_STATE && peerIp == leader) {
+ free(hostArray);
+ free(blockOwnerArray);
+ numHosts = read2(&inBuffer[1]);
+ numBlocks = read2(&inBuffer[3]);
+ while (hostArraySize < numHosts)
+ hostArraySize *= 2;
+ hostArray = calloc(hostArraySize, sizeof(struct hostData));
+ blockOwnerArray = calloc(numBlocks, 2);
+ memcpy(hostArray, &inBuffer[5], numHosts*sizeof(struct hostData));
+ memcpy(blockOwnerArray, &inBuffer[5+numHosts*sizeof(struct hostData)], numBlocks*2);
+ writeHostList();
+ setState(REBUILD3_STATE);
+ outBuffer[0] = DHT_UPDATE_RES;
+ udpSend(outBuffer, 1, peerIp);
+ }
+ break;
+
+ case DHT_UPDATE_RES:
+ if (state == LEAD_REBUILD2_STATE) {
+ checkReplied(peerIp);
+ if (allReplied()) {
+ setState(LEAD_REBUILD3_STATE);
+ outBuffer[0] = FILL_DHT_CMD;
+ udpSendAll(outBuffer, 1);
+ if (fillStatus != 0)
+ dhtLog("udpListen(): ERROR: fillTask already running\n");
+ fillStatus = 1;
+ if (pthread_create(&threadFillTask, NULL, fillTask, NULL) != 0)
+ dhtLog("udpListen(): ERROR creating threadFillTask\n");
+ }
+ }
+ break;
+
+ case ELECT_LEADER_CMD:
+ tmpUInt = read4(&inBuffer[1]);
+ if ((state == ELECT1_STATE || state == ELECT2_STATE)
+ && tmpUInt >= electionOriginator) { //already participating in a higher-priority election
+ outBuffer[0] = ELECT_LEADER_RES;
+ outBuffer[1] = 0xFF;
+ udpSend(outBuffer, 2, peerIp);
+ } else
+ { //join election
+ electionOriginator = tmpUInt;
+ electionParent = peerIp;
+ setState(ELECT1_STATE);
+ outBuffer[0] = ELECT_LEADER_CMD;
+ write4(&outBuffer[1], electionOriginator);
+ //don't bother forwarding the message to originator or parent
+ checkReplied(electionOriginator);
+ checkReplied(electionParent);
+ if (allReplied()) { //in case that is everybody I know of
+ setState(ELECT2_STATE);
+ outBuffer[0] = ELECT_LEADER_RES;
+ outBuffer[1] = 0;
+ write2(&outBuffer[2], numHosts);
+ memcpy(&outBuffer[4], hostArray, sizeof(struct hostData)
+ * numHosts);
+ udpSend(outBuffer, 4 + sizeof(struct hostData) * numHosts,
+ electionParent);
+ } else
+ {
+ udpSendAll(outBuffer, 5);
+ }
+ }
+ break;
+
+ case ELECT_LEADER_RES:
+ if (state == ELECT1_STATE) {
+ checkReplied(peerIp);
+ if (inBuffer[1] != 0xFF) {
+ tmpUShort = read2(&inBuffer[2]);
+ hostDataPtr = (struct hostData *)&inBuffer[4];
+ for (i = 0; i < tmpUShort; i++)
+ addHost(hostDataPtr[i]);
+ writeHostList();
+ }
+ if (allReplied()) {
+ setState(ELECT2_STATE);
+ if (electionOriginator == myHostData.ipAddr) {
+ leader = hostArray[0].ipAddr;
+ if (leader == myHostData.ipAddr) { //I am the leader
+ dhtLog("I am the leader!\n");
+ setState(LEAD_REBUILD1_STATE);
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ } else
+ { //notify leader
+ outBuffer[0] = CONGRATS_CMD;
+ write2(&outBuffer[1], numHosts);
+ hostDataPtr = (struct hostData *)&outBuffer[3];
+ for (i = 0; i < numHosts; i++)
+ hostDataPtr[i] = hostArray[i];
+ udpSend(outBuffer, 3 + sizeof(struct hostData) * numHosts,
+ leader);
+ }
+ } else
+ {
+ outBuffer[0] = ELECT_LEADER_RES;
+ outBuffer[1] = 0;
+ write2(&outBuffer[2], numHosts);
+ hostDataPtr = (struct hostData *)&outBuffer[4];
+ for (i = 0; i < numHosts; i++)
+ hostDataPtr[i] = hostArray[i];
+ udpSend(outBuffer, 4 + sizeof(struct hostData) * numHosts,
+ electionParent);
+ }
+ }
+ }
+ break;
+
+ case CONGRATS_CMD:
+ if (state == ELECT2_STATE) { //I am the leader
+ leader = myHostData.ipAddr;
+ dhtLog("I am the leader!\n");
+ tmpUShort = read2(&inBuffer[1]);
+ hostDataPtr = (struct hostData *)&inBuffer[3];
+ for (i = 0; i < tmpUShort; i++)
+ addHost(hostDataPtr[i]);
+ writeHostList();
+ setState(LEAD_REBUILD1_STATE);
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ }
+ break;
+
+ case REBUILD_REQ:
+ if (state == LEAD_NORMAL1_STATE || state == LEAD_NORMAL2_STATE) {
+ setState(LEAD_REBUILD1_STATE);
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ }
+ break;
+
+ case REBUILD_CMD:
+ leader = peerIp; //consider this a declaration of authority
+ setState(REBUILD1_STATE);
+ outBuffer[0] = JOIN_REQ;
+ write4(&outBuffer[1], myHostData.maxKeyCapacity);
+ udpSend(outBuffer, 5, leader);
+ break;
+
+ case FILL_DHT_CMD:
+ if (state == REBUILD3_STATE && peerIp == leader) {
+ setState(REBUILD4_STATE);
+ if (fillStatus != 0)
+ dhtLog("udpListen(): ERROR: fillTask already running\n");
+ fillStatus = 1;
+ if (pthread_create(&threadFillTask, NULL, fillTask, NULL) != 0)
+ dhtLog("udpListen(): ERROR creating threadFillTask\n");
+ }
+ break;
+
+ case FILL_DHT_RES:
+ if (state == LEAD_REBUILD3_STATE) {
+ checkReplied(peerIp);
+ if (allReplied() && fillStatus == 2) {
+ fillStatus = 0;
+ setState(LEAD_REBUILD4_STATE);
+ outBuffer[0] = RESUME_NORMAL_CMD;
+ udpSendAll(outBuffer, 1);
+ }
+ }
+ break;
+
+ case RESUME_NORMAL_CMD:
+ if (state == REBUILD5_STATE && peerIp == leader) {
+ setState(NORMAL_STATE);
+ outBuffer[0] = RESUME_NORMAL_RES;
+ udpSend(outBuffer, 1, leader);
+ }
+ break;
+
+ case RESUME_NORMAL_RES:
+ if (state == LEAD_REBUILD4_STATE) {
+ checkReplied(peerIp);
+ if (allReplied()) {
+ setState(LEAD_NORMAL1_STATE);
+ }
+ }
+ break;
+ }
+ }
+ }
+ if (state == REBUILD4_STATE) {
+ switch (fillStatus) {
+ case 0: dhtLog("udpListen(): ERROR: fillStatus=0 in REBUILD4_STATE\n");
+ break;
+
+ case 1: //do nothing
+ break;
+
+ case 2: //done filling the dht, notify leader
+ fillStatus = 0;
+ setState(REBUILD5_STATE);
+ outBuffer[0] = FILL_DHT_RES;
+ udpSend(outBuffer, 1, leader);
+ break;
+
+ case 3: //error encountered -> restart rebuild
+ fillStatus = 0;
+ setState(REBUILD0_STATE);
+ outBuffer[0] = REBUILD_REQ;
+ udpSend(outBuffer, 1, leader);
+ break;
+ }
+ }
+ if (state == LEAD_REBUILD3_STATE) {
+ switch (fillStatus) {
+ case 0: dhtLog("udpListen(): ERROR: fillStatus=0 in LEAD_REBUILD3_STATE\n");
+ break;
+
+ case 1: //do nothing
+ break;
+
+ case 2: //I'm done, now is everybody else also done?
+ if (allReplied()) {
+ fillStatus = 0;
+ setState(LEAD_REBUILD4_STATE);
+ outBuffer[0] = RESUME_NORMAL_CMD;
+ udpSendAll(outBuffer, 1);
+ }
+ break;
+
+ case 3: //error encountered -> restart rebuild
+ fillStatus = 0;
+ setState(LEAD_REBUILD1_STATE);
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ break;
+ }
+ }
+ if (timerSet) {
+ gettimeofday(&now, NULL);
+ if (timercmp(&now, &timer, >)) {
+ if (timeoutCntr < retry_vals[state]) {
+ timeoutCntr++;
+ timeradd(&now, &timeout_vals[state], &timer);
+ dhtLog("udpListen(): retry: %d\n", timeoutCntr);
+ switch (state) {
+ case INIT1_STATE:
+ outBuffer[0] = WHO_IS_LEADER_CMD;
+ udpSend(outBuffer, 1, seed);
+ break;
+
+ case INIT2_STATE:
+ outBuffer[0] = JOIN_REQ;
+ write4(&outBuffer[1], myHostData.maxKeyCapacity);
+ udpSend(outBuffer, 5, leader);
+ break;
+
+ case ELECT1_STATE:
+ outBuffer[0] = ELECT_LEADER_CMD;
+ write4(&outBuffer[1], electionOriginator);
+ udpSendAll(outBuffer, 5);
+ break;
+
+ case ELECT2_STATE:
+ if (electionOriginator == myHostData.ipAddr) { //retry notify leader
+ outBuffer[0] = CONGRATS_CMD;
+ write2(&outBuffer[1], numHosts);
+ memcpy(&outBuffer[3], hostArray, sizeof(struct hostData)
+ * numHosts);
+ udpSend(outBuffer, 3 + sizeof(struct hostData) * numHosts,
+ leader);
+ } else
+ {
+ outBuffer[0] = ELECT_LEADER_RES;
+ outBuffer[1] = 0;
+ write2(&outBuffer[2], numHosts);
+ memcpy(&outBuffer[4], hostArray, sizeof(struct hostData)
+ * numHosts);
+ udpSend(outBuffer, 4 + sizeof(struct hostData) * numHosts,
+ electionParent);
+ }
+ break;
+
+ case REBUILD0_STATE:
+ outBuffer[0] = REBUILD_REQ;
+ udpSend(outBuffer, 1, leader);
+ break;
+
+ case REBUILD1_STATE:
+ outBuffer[0] = JOIN_REQ;
+ write4(&outBuffer[1], myHostData.maxKeyCapacity);
+ udpSend(outBuffer, 5, leader);
+ break;
+
+ case REBUILD5_STATE:
+ outBuffer[0] = FILL_DHT_RES;
+ udpSend(outBuffer, 1, leader);
+ break;
+
+ case LEAD_REBUILD1_STATE:
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ break;
+
+ case LEAD_REBUILD2_STATE:
+ outBuffer[0] = DHT_UPDATE_CMD;
+ write2(&outBuffer[1], numHosts);
+ write2(&outBuffer[3], numBlocks);
+ memcpy(&outBuffer[5], hostArray, numHosts
+ * sizeof(struct hostData));
+ memcpy(&outBuffer[5+numHosts*sizeof(struct hostData)],
+ blockOwnerArray, numBlocks*2);
+ udpSendAll(outBuffer, 5 + sizeof(struct hostData) * numHosts
+ + 2 * numBlocks);
+ break;
+
+ case LEAD_REBUILD3_STATE:
+ outBuffer[0] = FILL_DHT_CMD;
+ udpSendAll(outBuffer, 1);
+ break;
+
+ case LEAD_REBUILD4_STATE:
+ outBuffer[0] = RESUME_NORMAL_CMD;
+ udpSendAll(outBuffer, 1);
+ break;
+
+ case EXIT1_STATE: //TODO...
+ break;
+
+ case NORMAL_STATE:
+ case LEAD_NORMAL1_STATE:
+ case LEAD_NORMAL2_STATE:
+ case REBUILD2_STATE:
+ case REBUILD3_STATE:
+ case REBUILD4_STATE:
+ case EXIT2_STATE: //we shouldn't get here
+ break;
+ }
+ } else
+ {
+ dhtLog("udpListen(): timed out in state %s after %d retries\n",
+ state_names[state], timeoutCntr);
+ switch (state) {
+ case INIT1_STATE:
+ setState(EXIT2_STATE);
+ break;
+
+ case LEAD_NORMAL2_STATE:
+ setState(LEAD_REBUILD1_STATE);
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ break;
+
+ case ELECT1_STATE:
+ dhtLog("removing unresponsive hosts, before:\n");
+ writeHostList();
+ removeUnresponsiveHosts();
+ dhtLog("after\n");
+ writeHostList();
+ setState(ELECT2_STATE);
+ if (electionOriginator == myHostData.ipAddr) {
+ leader = hostArray[0].ipAddr;
+ if (leader == myHostData.ipAddr) { //I am the leader
+ dhtLog("I am the leader!\n");
+ setState(LEAD_REBUILD1_STATE);
+ outBuffer[0] = REBUILD_CMD;
+ udpSendAll(outBuffer, 1);
+ } else
+ { //notify leader
+ outBuffer[0] = CONGRATS_CMD;
+ write2(&outBuffer[1], numHosts);
+ memcpy(&outBuffer[3], hostArray, sizeof(struct hostData)
+ * numHosts);
+ udpSend(outBuffer, 3 + sizeof(struct hostData) * numHosts,
+ leader);
+ }
+ } else
+ {
+ outBuffer[0] = ELECT_LEADER_RES;
+ outBuffer[1] = 0;
+ write2(&outBuffer[2], numHosts);
+ memcpy(&outBuffer[4], hostArray, sizeof(struct hostData)
+ * numHosts);
+ udpSend(outBuffer, 4 + sizeof(struct hostData) * numHosts,
+ electionParent);
+ }
+ break;
+
+ case INIT2_STATE:
+ case ELECT2_STATE:
+ case REBUILD0_STATE:
+ case REBUILD1_STATE:
+ case REBUILD2_STATE:
+ case REBUILD3_STATE:
+ case REBUILD4_STATE:
+ case REBUILD5_STATE:
+ case LEAD_REBUILD1_STATE:
+ case LEAD_REBUILD2_STATE:
+ case LEAD_REBUILD3_STATE:
+ case LEAD_REBUILD4_STATE:
+ //start election
+ electionOriginator = myHostData.ipAddr;
+ setState(ELECT1_STATE);
+ outBuffer[0] = ELECT_LEADER_CMD;
+ write4(&outBuffer[1], myHostData.ipAddr); //originator = me
+ udpSendAll(outBuffer, 5);
+ break;
+
+ case EXIT1_STATE:
+ setState(EXIT2_STATE);
+ break;
+
+ case NORMAL_STATE:
+ case LEAD_NORMAL1_STATE:
+ case EXIT2_STATE: //we shouldn't get here
+ break;
+ }
+ }
+ }
+ }
+ if (state != oldState)
+ pthread_cond_broadcast(&stateCond);
+ pthread_mutex_unlock(&stateMutex);
+ }
+}
+
--- /dev/null
+#ifndef _DHT_H
+#define _DHT_H
+
+#include <stdio.h>
+
+/*******************************************************************************
+* Local Structs
+*******************************************************************************/
+
+#define DHT_NO_KEY_LIMIT 0xFFFFFFFF
+
+/*******************************************************************************
+* Interface Function Prototypes
+*******************************************************************************/
+
+//called by host which joins (or starts) the system
+void dhtInit(unsigned int seedIp, unsigned int maxKeyCapaciy);
+//exit system, cleanup
+void dhtExit();
+
+//called by whoever performs the creation, move, deletion
+
+//returns 0 if successful, -1 if an error occurred
+int dhtInsert(unsigned int key, unsigned int val);
+//simultaneously inserts the key-val pairs in the given arrays
+int dhtInsertMult(unsigned int numKeys, unsigned int *keys, unsigned int *vals);
+//returns 0 if successful, -1 if an error occurred
+int dhtRemove(unsigned int key);
+//simultaneously delete the keys in the given array
+int dhtRemoveMult(unsigned int numKeys, unsigned int *keys);
+//returns 0 if successful and copies val into *val,
+// 1 if key not found, -1 if an error occurred
+int dhtSearch(unsigned int key, unsigned int *val);
+//simultaneously search for the vals that correspond to the given keys.
+// result is placed in vals[]
+int dhtSearchMult(unsigned int numKeys, unsigned int *keys, unsigned int *vals);
+#endif
+
--- /dev/null
+#ifndef _DSMDEBUG_H_
+#define _DSMDEBUG_H_
+
+#include <sys/time.h>
+
+#define TABORT1(s) {printf("%s\n", s); fflush(stdout);}
+#define TABORT2(s, msg) {printf("%s(): %s\n", s, msg); fflush(stdout);}
+#define TABORT3(func, s, msg, d) {printf("%s(): %s: for %s = %d\n", func, s, msg, d); fflush(stdout);}
+#define TABORT4(s, d) {printf("%s = %d\n", s, d); fflush(stdout);}
+#define TABORT5(func, msg1, msg2, val1, val2) {printf("%s(): %s = %x, %s = %d\n", func, msg1, val1, msg2, val2); fflush(stdout);}
+#define TABORT6(a, b, c, val1, val2) {printf("%s = %x, %s for %s = %x\n", a, val1, b, c, val2); fflush(stdout);}
+#define TABORT7(func, a, b, c, val1, val2) {printf("%s(): %s for %s =%d, %s = %x\n", func, a, b, val1, c, val2); fflush(stdout);}
+#define TABORT8(func, s, d) {printf("%s(): %s = %d\n", func, s, d); fflush(stdout);}
+#define TABORT9(func, a, b, c, d, val1, val2, val3) {printf("%s(): %s for %s =%x, %s = %d, %s = %x\n", func, a, b, val1, c, val2, d, val3); fflush(stdout);}
+
+#define ARRAY_SIZE 10100
+#define GETSTARTDELAY(start, count) { \
+ struct timeval tv; \
+ count++; \
+ gettimeofday(&tv, NULL); \
+ start = tv.tv_sec+(tv.tv_usec/1000000.0); \
+}
+
+#define GETSTART(start) { \
+ struct timeval tv; \
+ gettimeofday(&tv, NULL); \
+ start = tv.tv_sec+(tv.tv_usec/1000000.0); \
+}
+
+#define GETENDDELAY(start, end, time) { \
+ struct timeval tv; \
+ gettimeofday(&tv, NULL); \
+ end = tv.tv_sec+(tv.tv_usec/1000000.0); \
+ time = (end-start); \
+}
+
+#endif
--- /dev/null
+#include "dsmlock.h"
+#include <stdio.h>
+
+inline void initdsmlocks(volatile unsigned int *addr) {
+ (*addr) = RW_LOCK_BIAS;
+}
+
+
+inline void readLock(volatile unsigned int *addr) {
+ __asm__ __volatile__ ("" " subl $1,(%0)\n\t"
+ "jns 1f\n"
+ "1:\n"
+ :: "a" (addr) : "memory");
+}
+
+inline void writeLock(volatile unsigned int *addr) {
+ __asm__ __volatile__ ("" " subl %1,(%0)\n\t"
+ "jz 1f\n"
+ "1:\n"
+ :: "a" (addr), "i" (RW_LOCK_BIAS) : "memory");
+}
+
+static inline void atomic_dec(atomic_t *v) {
+ __asm__ __volatile__ (LOCK_PREFIX "decl %0"
+ : "+m" (v->counter));
+}
+
+static inline void atomic_inc(atomic_t *v) {
+ __asm__ __volatile__ (LOCK_PREFIX "incl %0"
+ : "+m" (v->counter));
+}
+
+static inline int atomic_sub_and_test(int i, atomic_t *v) {
+ unsigned char c;
+
+ __asm__ __volatile__ (LOCK_PREFIX "subl %2,%0; sete %1"
+ : "+m" (v->counter), "=qm" (c)
+ : "ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic_add(int i, atomic_t *v) {
+ __asm__ __volatile__ (LOCK_PREFIX "addl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i));
+}
+
+inline int read_trylock(volatile unsigned int *lock) {
+ atomic_t *count = (atomic_t *)lock;
+
+ atomic_dec(count);
+ if (atomic_read(count) >= 0)
+ return 1; //can aquire a new read lock
+ atomic_inc(count);
+ return 0; //failure
+}
+
+inline int write_trylock(volatile unsigned int *lock) {
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+ return 1; // get a write lock
+ }
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0; // failed to acquire a write lock
+}
+
+inline void read_unlock(volatile unsigned int *rw) {
+ __asm__ __volatile__ (LOCK_PREFIX "incl %0" : "+m" (*rw) : : "memory");
+}
+
+inline void write_unlock(volatile unsigned int *rw) {
+ __asm__ __volatile__ (LOCK_PREFIX "addl %1, %0"
+ : "+m" (*rw) : "i" (RW_LOCK_BIAS) : "memory");
+}
--- /dev/null
+#ifndef _DSMLOCK_H_
+#define _DSMLOCK_H_
+
+#define RW_LOCK_BIAS 0x01000000
+#define atomic_read(v) ((v)->counter)
+#define RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
+//#define LOCK_PREFIX ""
+#define LOCK_PREFIX \
+ ".section .smp_locks,\"a\"\n" \
+ " .align 4\n" \
+ " .long 661f\n" /* address */\
+ ".previous\n" \
+ "661:\n\tlock; "
+
+
+
+typedef struct {
+ unsigned int counter;
+} atomic_t;
+
+void initdsmlocks(volatile unsigned int *addr);
+void readLock(volatile unsigned int *addr);
+void writeLock(volatile unsigned int *addr);
+int read_trylock(volatile unsigned int *lock);
+int write_trylock(volatile unsigned int *lock);
+static void atomic_dec(atomic_t *v);
+static void atomic_inc(atomic_t *v);
+static void atomic_add(int i, atomic_t *v);
+static int atomic_sub_and_test(int i, atomic_t *v);
+void read_unlock(volatile unsigned int *rw);
+void write_unlock(volatile unsigned int *rw);
+#endif
--- /dev/null
+#include "dstm.h"
+
+extern int classsize[];
+
+/* BEGIN object header */
+
+
+/* END object header */
+
--- /dev/null
+#ifndef _DSTM_H_
+#define _DSTM_H_
+
+#ifdef MAC
+#define MSG_NOSIGNAL 0
+#endif
+
+/***********************************************************
+ * Macros
+ **********************************************************/
+#define GET_SITEID(x) ((int *)(x))
+#define GET_NTUPLES(x) ((int *)(x + sizeof(int)))
+#define GET_PTR_OID(x) ((unsigned int *)(x + 2*sizeof(int)))
+#define GET_PTR_EOFF(x,n) ((short *)(x + 2*sizeof(int) + (n*sizeof(unsigned int))))
+#define GET_PTR_ARRYFLD(x,n) ((short *)(x + 2*sizeof(int) + (n*sizeof(unsigned int)) + (n*sizeof(short))))
+#define ENDEBUG(s) { printf("Inside %s()\n", s); fflush(stdout);}
+#define EXDEBUG(s) {printf("Outside %s()\n", s); fflush(stdout);}
+/*****************************************
+ * Coordinator Messages
+ ***************************************/
+#define READ_REQUEST 1
+#define READ_MULT_REQUEST 2
+#define MOVE_REQUEST 3
+#define MOVE_MULT_REQUEST 4
+#define TRANS_REQUEST 5
+#define TRANS_ABORT 6
+#define TRANS_COMMIT 7
+#define TRANS_PREFETCH 8
+#define TRANS_ABORT_BUT_RETRY_COMMIT_WITH_RELOCATING 9
+/*********************************
+ * Participant Messages
+ *******************************/
+#define OBJECT_FOUND 10
+#define OBJECT_NOT_FOUND 11
+#define OBJECTS_FOUND 12
+#define OBJECTS_NOT_FOUND 13
+#define TRANS_AGREE 17
+#define TRANS_DISAGREE 18
+#define TRANS_AGREE_BUT_MISSING_OBJECTS 19
+#define TRANS_SOFT_ABORT 20
+#define TRANS_SUCESSFUL 21
+#define TRANS_PREFETCH_RESPONSE 22
+#define START_REMOTE_THREAD 23
+#define THREAD_NOTIFY_REQUEST 24
+#define THREAD_NOTIFY_RESPONSE 25
+#define TRANS_UNSUCESSFUL 26
+#define CLOSE_CONNECTION 27
+/*******************************
+ * Duplication Messages
+ *****************************/
+#define RESPOND_LIVE 30
+#define LIVE 31
+#define REMOTE_RESTORE_DUPLICATED_STATE 37
+#define UPDATE_LIVE_HOSTS 32
+#define DUPLICATE_ORIGINAL 33
+#define DUPLICATE_BACKUP 34
+#define DUPLICATION_COMPLETE 35
+#define RECEIVE_DUPES 36
+/*********************************
+ * Paxos Messages
+ *******************************/
+#define PAXOS_PREPARE 40
+#define PAXOS_PREPARE_REJECT 41
+#define PAXOS_PREPARE_OK 42
+#define PAXOS_ACCEPT 43
+#define PAXOS_ACCEPT_REJECT 44
+#define PAXOS_ACCEPT_OK 45
+#define PAXOS_LEARN 46
+#define DELETE_LEADER 47
+
+//Max number of objects
+#define MAX_OBJECTS 20
+//Transaction id per machine
+#define TID_LEN 20
+#define LISTEN_PORT 2156
+#define UDP_PORT 2158
+//Prefetch tuning paramters
+//#define RETRYINTERVAL 20 //N (For Em3d, SOR, Moldyn benchmarks)
+//#define SHUTDOWNINTERVAL 3 //M
+#define RETRYINTERVAL 100 //N (For MatrixMultiply, 2DFFT benchmarks)
+#define SHUTDOWNINTERVAL 1 //M
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include "clookup.h"
+#include "queue.h"
+#include "mcpileq.h"
+#include "threadnotify.h"
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include "sockpool.h"
+#include <signal.h>
+#include "plookup.h"
+#include "dsmdebug.h"
+#ifdef ABORTREADERS
+#include <setjmp.h>
+#endif
+
+//bit designations for status field of objheader
+#define DIRTY 0x01
+#define NEW 0x02
+#define LOCK 0x04
+#define LOCAL 0x08
+
+/*******Global statistics *********/
+extern int numprefetchsites;
+
+double idForTimeDelay; /* TODO Remove, necessary to get time delay for starting transRequest for this id */
+int transCount; /* TODO Remove, necessary to the transaction id */
+
+#ifdef COMPILER
+
+#include "structdefs.h"
+
+typedef struct objheader {
+ threadlist_t *notifylist;
+ unsigned short version;
+ unsigned short rcount;
+ char isBackup;
+} objheader_t;
+
+#define OID(x) \
+ (*((unsigned int *)&((struct ___Object___ *)((unsigned int) x + sizeof(objheader_t)))->___nextobject___))
+
+#define COMPOID(x) \
+ ((void*)((((void *) x )!=NULL) ? (*((unsigned int *)&((struct ___Object___ *) x)->___nextobject___)) : 0))
+
+#define STATUS(x) \
+ *((unsigned int *) &(((struct ___Object___ *)((unsigned int) x + sizeof(objheader_t)))->___localcopy___))
+
+#define STATUSPTR(x) \
+ ((unsigned int *) &(((struct ___Object___ *)((unsigned int) x + sizeof(objheader_t)))->___localcopy___))
+
+#define TYPE(x) \
+ ((struct ___Object___ *)((unsigned int) x + sizeof(objheader_t)))->type
+
+#define GETSIZE(size, x) { \
+ int type=TYPE(x); \
+ if (type<NUMCLASSES) { \
+ size=classsize[type]; \
+ } else { \
+ size=classsize[type]*((struct ArrayObject *)&((objheader_t *)x)[1])->___length___+sizeof(struct ArrayObject); \
+ } \
+}
+
+#else
+
+typedef struct objheader {
+ threadlist_t *notifylist;
+ unsigned int oid;
+ unsigned short type;
+ unsigned short version;
+ unsigned short rcount;
+ char status;
+} objheader_t;
+
+#define OID(x) x->oid
+#define TYPE(x) x->type
+#define STATUS(x) x->status
+#define STATUSPTR(x) &x->status
+#define GETSIZE(size, x) size=classsize[TYPE(x)]
+#endif
+
+typedef struct objstr {
+ unsigned int size; //this many bytes are allocated after this header
+ void *top;
+ struct objstr *next;
+ struct objstr *prev;
+} objstr_t;
+
+typedef struct oidmidpair {
+ unsigned int oid;
+ unsigned int mid;
+} oidmidpair_t;
+
+// Structure is a shared structure that keeps track of responses from the participants
+typedef struct thread_response {
+ char rcv_status;
+} thread_response_t;
+
+// Structure that holds fixed data to be sent along with TRANS_REQUEST
+typedef struct fixed_data {
+ char control; /* control message */
+ char trans_id[TID_LEN]; /* transaction id */
+ int mcount; /* participant count */
+ unsigned int numread; /* no of objects read */
+ unsigned int nummod; /* no of objects modified */
+ unsigned int numcreated; /* no of objects created */
+ int sum_bytes; /* total bytes of modified objects in a transaction */
+} fixed_data_t;
+
+/* Structure that holds trans request information for each participant */
+typedef struct trans_req_data {
+ fixed_data_t f; /* Holds first few fixed bytes of data sent during TRANS_REQUEST protcol*/
+ unsigned int *listmid; /* Pointer to array holding list of participants */
+ char *objread; /* Pointer to array holding oid and version number of objects that are only read */
+ unsigned int *oidmod; /* Pointer to array holding oids of objects that are modified */
+ unsigned int *oidcreated; /* Pointer to array holding oids of objects that are newly created */
+} trans_req_data_t;
+
+/* Structure that holds information of objects that are not found in the participant
+ * and objs locked within a transaction during commit process */
+typedef struct trans_commit_data {
+ unsigned int *objlocked; /* Pointer to array holding oids of objects locked inside a transaction */
+ unsigned int *objnotfound; /* Pointer to array holding oids of objects not found on the participant machine */
+ unsigned int *objvernotmatch; /* Pointer to array holding oids whose version doesn't match on the participant machine */
+ void *modptr; /* Pointer to the address in the mainobject store of the participant that holds all modified objects */
+ int numlocked; /* no of objects locked */
+ int numnotfound; /* no of objects not found */
+ int numvernotmatch; /* no of objects whose version doesn't match */
+} trans_commit_data_t;
+
+
+#define PRINT_TID(PTR) printf("DEBUG -> %x %d\n", PTR->mid, PTR->thread_id);
+
+/* Initialize main object store and lookup tables, start server thread. */
+int dstmInit(void);
+void send_data(int fd, void *buf, int buflen);
+void recv_data(int fd, void *buf, int buflen);
+int recv_data_errorcode(int fd, void *buf, int buflen);
+
+/* Prototypes for object header */
+unsigned int getNewOID(void);
+/* end object header */
+
+/* Prototypes for object store */
+objstr_t *objstrCreate(unsigned int size); //size in bytes
+void objstrDelete(objstr_t *store); //traverse and free entire list
+void *objstrAlloc(objstr_t **store, unsigned int size); //size in bytes
+void clearObjStore(); // TODO:currently only clears the prefetch cache object store
+/* end object store */
+
+/* Prototypes for duplications */
+void updateLiveHosts();
+int updateLiveHostsCommit();
+void setLocateObjHosts();
+void printHostsStatus();
+int allHostsLive();
+void duplicateLostObjects(unsigned int mid);
+void duplicateLocalBackupObjects();
+void duplicateLocalOriginalObjects();
+int readDuplicateObjs(int);
+void restoreDuplicationState(unsigned int deadHost);
+unsigned int getPrimaryMachine(unsigned int mid);
+unsigned int getBackupMachine(unsigned int mid);
+int getNumLiveHostsInSystem();
+unsigned int getNewTransID(void);
+/* end duplication */
+
+/* Prototypes for server portion */
+void *dstmListen(void *);
+int startlistening();
+void *dstmAccept(void *);
+int readClientReq(trans_commit_data_t *, int);
+int processClientReq(fixed_data_t *, trans_commit_data_t *,unsigned int *, char *, void *, unsigned int *, int);
+char handleTransReq(fixed_data_t *, trans_commit_data_t *, unsigned int *, char *, void *, int);
+char decideCtrlMessage(fixed_data_t *, trans_commit_data_t *, int *, int *, int *, int *, int *, void *, unsigned int *, unsigned int *, int);
+int transCommitProcess(void *, unsigned int *, unsigned int *, int, int, int);
+void processReqNotify(unsigned int numoid, unsigned int *oid, unsigned short *version, unsigned int mid, unsigned int threadid);
+void getCommitCountForObjMod(unsigned int *, unsigned int *, unsigned int *, int *,
+ int *, int *, int *, int *, int *, int *, char *, unsigned int, unsigned short);
+void getCommitCountForObjRead(unsigned int *, unsigned int *, unsigned int *, int *, int *, int *, int *, int *,
+ int *, int *, char *, unsigned int, unsigned short);
+/* end server portion */
+
+/* Prototypes for transactions */
+/* Function called at beginning. Passes in the first parameter. */
+/* Returns 1 if this thread should run the main process */
+
+int dstmStartup(const char *);
+void transInit();
+int processConfigFile();
+void addHost(unsigned int);
+void mapObjMethod(unsigned short);
+
+void randomdelay();
+void transStart();
+#define TRANSREAD(x,y) { \
+ unsigned int inputvalue;\
+if ((inputvalue=(unsigned int)y)==0) x=NULL;\
+else { \
+chashlistnode_t * cnodetmp=&c_table[(inputvalue&c_mask)>>1]; \
+do { \
+ if (cnodetmp->key==inputvalue) {x=(void *)&((objheader_t*)cnodetmp->val)[1];break;} \
+cnodetmp=cnodetmp->next;\
+ if (cnodetmp==NULL) {x=(void *)transRead2(inputvalue); asm volatile("":"=m"(c_table),"=m"(c_mask));break;} \
+} while(1);\
+}}
+
+__attribute__((pure)) objheader_t *transRead(unsigned int);
+__attribute__((pure)) objheader_t *transRead2(unsigned int);
+objheader_t *transCreateObj(unsigned int); //returns oid header
+unsigned int locateBackupMachine(unsigned int oid);
+int transCommit(); //return 0 if successful
+void *transRequest(void *); //the C routine that the thread will execute when TRANS_REQUEST begins
+char decideResponse(char *, char *, int); // Coordinator decides what response to send to the participant
+void *getRemoteObj(unsigned int, unsigned int); // returns object header from main object store after object is copied into it from remote machine
+void handleLocalReq(trans_req_data_t *, trans_commit_data_t *, char *);
+int transComProcess(trans_req_data_t *, trans_commit_data_t *);
+void doLocalProcess(char, trans_req_data_t *tdata, trans_commit_data_t *);
+int transAbortProcess(trans_commit_data_t *);
+void transAbort();
+void sendPrefetchResponse(int sd, char *control, char *sendbuffer, int *size);
+void prefetch(int, int, unsigned int *, unsigned short *, short*);
+void *transPrefetch(void *);
+void *mcqProcess(void *);
+prefetchpile_t *foundLocal(char *); // returns node with prefetch elements(oids, offsets)
+int lookupObject(unsigned int * oid, short offset);
+int checkoid(unsigned int oid);
+int transPrefetchProcess(int **, short);
+void sendPrefetchReq(prefetchpile_t*, int);
+void sendPrefetchReqnew(prefetchpile_t*, int);
+int getPrefetchResponse(int);
+unsigned short getObjType(unsigned int oid);
+int startRemoteThread(unsigned int oid, unsigned int mid);
+plistnode_t *pInsert(plistnode_t *pile, objheader_t *headeraddr, unsigned int mid, int num_objs);
+void commitCountForObjRead(char *, unsigned int *, unsigned int *, int *, int *, int *, int *, int *, unsigned int, unsigned short);
+void commitCountForObjMod(char *, unsigned int *, unsigned int *, int *, int *, int *, int *, int *, unsigned int, unsigned short);
+
+/* Sends notification request for thread join, if sucessful returns 0 else returns -1 */
+int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid);
+void threadNotify(unsigned int oid, unsigned short version, unsigned int tid);
+int notifyAll(threadlist_t **head, unsigned int oid, unsigned int version);
+
+/* Paxo's algorithm */
+int paxos();
+int paxosPrepare();
+int paxosAccept();
+void paxosLearn();
+
+/* Internal functions from signal.c */
+int getthreadid();
+double getMax(double *array, int size);
+double getMin(double *array, int size);
+double getfast(int siteid, int threadid);
+double getslowest(int siteid, int threadid);
+double getavg(int siteid, int threadid);
+double getavgperthd(int siteid, int threadid);
+double avgfast(int siteid, int threadid);
+double avgslow(int siteid, int threadid);
+void bubblesort();
+void swap(double *e1, double *e2);
+double avgofthreads(int siteid, int threadid);
+
+/* end transactions */
+
+#include "trans.h"
+#endif
--- /dev/null
+/* Coordinator => Machine that initiates the transaction request call for commiting a transaction
+ * Participant => Machines that host the objects involved in a transaction commit */
+
+#include <netinet/tcp.h>
+#include <ip.h>
+#include "dstm.h"
+#include "mlookup.h"
+#include "llookup.h"
+#include "threadnotify.h"
+#include "prefetch.h"
+#include <sched.h>
+#ifdef COMPILER
+#include "thread.h"
+#endif
+#include "gCollect.h"
+
+#ifdef RECOVERY
+#include <unistd.h>
+#include <signal.h>
+#endif
+
+#define BACKLOG 10 //max pending connections
+#define RECEIVE_BUFFER_SIZE 2048
+
+extern int classsize[];
+extern int numHostsInSystem;
+extern pthread_mutex_t notifymutex;
+
+extern int *liveHosts;
+extern unsigned int *locateObjHosts;
+pthread_mutex_t liveHosts_mutex;
+pthread_mutex_t leaderFixing_mutex;
+
+extern int liveHostsValid;
+extern int numLiveHostsInSystem;
+extern __thread int timeoutFlag;
+extern __thread int timeoutFlag;
+int testcount = 0;
+
+objstr_t *mainobjstore;
+pthread_mutex_t mainobjstore_mutex;
+pthread_mutex_t lockObjHeader;
+pthread_mutexattr_t mainobjstore_mutex_attr; /* Attribute for lock to make it a recursive lock */
+
+sockPoolHashTable_t *transPResponseSocketPool;
+extern sockPoolHashTable_t *transRequestSockPool;
+
+int failFlag = 0; //debug
+int leaderFixing;
+
+/******************************
+ * Global variables for Paxos
+ ******************************/
+extern int n_a;
+extern unsigned int v_a;
+extern int n_h;
+extern int my_n;
+extern int leader;
+extern int paxosRound;
+/* This function initializes the main objects store and creates the
+ * global machine and location lookup table */
+
+int dstmInit(void) {
+ mainobjstore = objstrCreate(DEFAULT_OBJ_STORE_SIZE);
+ /* Initialize attribute for mutex */
+ pthread_mutexattr_init(&mainobjstore_mutex_attr);
+ pthread_mutexattr_settype(&mainobjstore_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
+ pthread_mutex_init(&mainobjstore_mutex, &mainobjstore_mutex_attr);
+ pthread_mutex_init(&lockObjHeader,NULL);
+
+ pthread_mutex_init(&liveHosts_mutex, NULL);
+ pthread_mutex_init(&leaderFixing_mutex, NULL);
+
+ if (mhashCreate(MHASH_SIZE, MLOADFACTOR))
+ return 1; //failure
+
+ if (lhashCreate(HASH_SIZE, LOADFACTOR))
+ return 1; //failure
+
+ if (notifyhashCreate(N_HASH_SIZE, N_LOADFACTOR))
+ return 1; //failure
+
+ //Initialize socket pool
+ if((transPResponseSocketPool = createSockPool(transPResponseSocketPool, DEFAULTSOCKPOOLSIZE)) == NULL) {
+ printf("Error in creating new socket pool at %s line %d\n", __FILE__, __LINE__);
+ return 0;
+ }
+
+ return 0;
+}
+
+int startlistening() {
+ int listenfd;
+ struct sockaddr_in my_addr;
+ socklen_t addrlength = sizeof(struct sockaddr);
+ int setsockflag=1;
+
+ listenfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (listenfd == -1) {
+ perror("socket");
+ exit(1);
+ }
+
+ if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &setsockflag, sizeof (setsockflag)) < 0) {
+ perror("socket");
+ exit(1);
+ }
+#ifdef MAC
+ if (setsockopt(listenfd, SOL_SOCKET, SO_NOSIGPIPE, &setsockflag, sizeof (setsockflag)) < 0) {
+ perror("socket");
+ exit(1);
+ }
+#endif
+
+ my_addr.sin_family = AF_INET;
+ my_addr.sin_port = htons(LISTEN_PORT);
+ my_addr.sin_addr.s_addr = INADDR_ANY;
+ memset(&(my_addr.sin_zero), '\0', 8);
+
+ if (bind(listenfd, (struct sockaddr *)&my_addr, addrlength) == -1) {
+ perror("bind");
+ exit(1);
+ }
+
+ if (listen(listenfd, BACKLOG) == -1) {
+ perror("listen");
+ exit(1);
+ }
+ return listenfd;
+}
+
+/* This function starts the thread to listen on a socket
+ * for tranaction calls */
+void *dstmListen(void *lfd) {
+ int listenfd=(int)lfd;
+ int acceptfd;
+ struct sockaddr_in client_addr;
+ socklen_t addrlength = sizeof(struct sockaddr);
+ pthread_t thread_dstm_accept;
+
+ printf("Listening on port %d, fd = %d\n", LISTEN_PORT, listenfd);
+ while(1) {
+ int retval;
+ int flag=1;
+ if(failFlag) while(1);
+ acceptfd = accept(listenfd, (struct sockaddr *)&client_addr, &addrlength);
+ setsockopt(acceptfd, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(flag));
+ do {
+ retval=pthread_create(&thread_dstm_accept, NULL, dstmAccept, (void *)acceptfd);
+ } while(retval!=0);
+ pthread_detach(thread_dstm_accept);
+ }
+}
+/* This function accepts a new connection request, decodes the control message in the connection
+ * and accordingly calls other functions to process new requests */
+void *dstmAccept(void *acceptfd) {
+ int val, retval, size, sum, sockid, sd = 0;
+ unsigned int oid;
+ char *buffer;
+ char control,ctrl, response;
+ char *ptr;
+ void *srcObj;
+ void *dupeptr;
+ int i, tempsize;
+ objheader_t *h;
+ trans_commit_data_t transinfo;
+ unsigned short objType, *versionarry, version;
+ unsigned int *oidarry, numoid, mid, threadid;
+ int n, v;
+
+ printf("%s-> Entering dstmAccept\n", __func__); fflush(stdout);
+ /* Receive control messages from other machines */
+ while(1) {
+ int ret=recv_data_errorcode((int)acceptfd, &control, sizeof(char));
+ /* if(timeoutFlag || timeoutFlag) {
+ //is there any way to force a context switch?
+ printf("recv_data_errorcode: exiting, timeoutFlag:%d, timeoutFlag:%d\n", failedMachineFlag, timeoutFlag);
+ exit(0);
+ }*/
+ if(failFlag) {
+ while(1) {
+ sleep(10);
+ }
+ }
+
+ if (ret==0)
+ break;
+ if (ret==-1) {
+ printf("DEBUG -> RECV Error!.. retrying\n");
+ exit(0);
+ break;
+ }
+ printf("%s-> dstmAccept control = %d\n", __func__, (int)control);
+ switch(control) {
+ case READ_REQUEST:
+ /* Read oid requested and search if available */
+ recv_data((int)acceptfd, &oid, sizeof(unsigned int));
+ while((srcObj = mhashSearch(oid)) == NULL) {
+ int ret;
+ if((ret = sched_yield()) != 0) {
+ printf("%s(): error no %d in thread yield\n", __func__, errno);
+ }
+ }
+ h = (objheader_t *) srcObj;
+ GETSIZE(size, h);
+ size += sizeof(objheader_t);
+ sockid = (int) acceptfd;
+ if (h == NULL) {
+ ctrl = OBJECT_NOT_FOUND;
+ send_data(sockid, &ctrl, sizeof(char));
+ if(timeoutFlag || timeoutFlag) {
+ printf("send_data: remote machine dead, line:%d\n", __LINE__);
+ timeoutFlag = 0;
+ exit(1);
+ }
+ } else {
+ // Type
+ char msg[]={OBJECT_FOUND, 0, 0, 0, 0};
+ *((int *)&msg[1])=size;
+ printf("*****testcount:%d\n", testcount);
+ printf("oid:%u, h->version:%d\n", OID(h), h->version);
+ //if(OID(h) == 1 && ((h->version == 20 && liveHosts[0]) || (h->version == 15000 && liveHosts[2])))
+ if(testcount == 1000)
+ {
+ printf("Pretending to fail\n");
+ failFlag = 1;//sleep(5);
+ while(1) {
+ sleep(10);
+ }//exit(0);
+ }
+ else
+ testcount++;
+ send_data(sockid, &msg, sizeof(msg));
+ send_data(sockid, h, size);
+ if(timeoutFlag || timeoutFlag) {
+ printf("send_data: remote machine dead, line:%d\n", __LINE__);
+ timeoutFlag = 0;
+ exit(1);
+ }
+ }
+ break;
+
+ case READ_MULT_REQUEST:
+ break;
+
+ case MOVE_REQUEST:
+ break;
+
+ case MOVE_MULT_REQUEST:
+ break;
+
+ case TRANS_REQUEST:
+ /* Read transaction request */
+ transinfo.objlocked = NULL;
+ transinfo.objnotfound = NULL;
+ transinfo.modptr = NULL;
+ transinfo.numlocked = 0;
+ transinfo.numnotfound = 0;
+ if((val = readClientReq(&transinfo, (int)acceptfd)) != 0) {
+ printf("Error: In readClientReq() %s, %d\n", __FILE__, __LINE__);
+ pthread_exit(NULL);
+ }
+ break;
+
+ case TRANS_PREFETCH:
+#ifdef RANGEPREFETCH
+ if((val = rangePrefetchReq((int)acceptfd)) != 0) {
+ printf("Error: In rangePrefetchReq() %s, %d\n", __FILE__, __LINE__);
+ break;
+ }
+#else
+ if((val = prefetchReq((int)acceptfd)) != 0) {
+ printf("Error: In prefetchReq() %s, %d\n", __FILE__, __LINE__);
+ break;
+ }
+#endif
+ break;
+
+ case TRANS_PREFETCH_RESPONSE:
+#ifdef RANGEPREFETCH
+ if((val = getRangePrefetchResponse((int)acceptfd)) != 0) {
+ printf("Error: In getRangePrefetchRespose() %s, %d\n", __FILE__, __LINE__);
+ break;
+ }
+#else
+ if((val = getPrefetchResponse((int) acceptfd)) != 0) {
+ printf("Error: In getPrefetchResponse() %s, %d\n", __FILE__, __LINE__);
+ break;
+ }
+#endif
+ break;
+
+ case START_REMOTE_THREAD:
+ recv_data((int)acceptfd, &oid, sizeof(unsigned int));
+ objType = getObjType(oid);
+ printf("%s-> Call startDSMthread\n", __func__);
+ startDSMthread(oid, objType);
+ printf("%s-> Finish startDSMthread\n", __func__);
+ break;
+
+ case THREAD_NOTIFY_REQUEST:
+ recv_data((int)acceptfd, &numoid, sizeof(unsigned int));
+ size = (sizeof(unsigned int) + sizeof(unsigned short)) * numoid + 2 * sizeof(unsigned int);
+ if((buffer = calloc(1,size)) == NULL) {
+ printf("%s() Calloc error at %s, %d\n", __func__, __FILE__, __LINE__);
+ pthread_exit(NULL);
+ }
+
+ recv_data((int)acceptfd, buffer, size);
+
+ oidarry = calloc(numoid, sizeof(unsigned int));
+ memcpy(oidarry, buffer, sizeof(unsigned int) * numoid);
+ size = sizeof(unsigned int) * numoid;
+ versionarry = calloc(numoid, sizeof(unsigned short));
+ memcpy(versionarry, buffer+size, sizeof(unsigned short) * numoid);
+ size += sizeof(unsigned short) * numoid;
+ mid = *((unsigned int *)(buffer+size));
+ size += sizeof(unsigned int);
+ threadid = *((unsigned int *)(buffer+size));
+ processReqNotify(numoid, oidarry, versionarry, mid, threadid);
+ free(buffer);
+
+ break;
+
+ case THREAD_NOTIFY_RESPONSE:
+ size = sizeof(unsigned short) + 2 * sizeof(unsigned int);
+ if((buffer = calloc(1,size)) == NULL) {
+ printf("%s() Calloc error at %s, %d\n", __func__, __FILE__, __LINE__);
+ pthread_exit(NULL);
+ }
+
+ recv_data((int)acceptfd, buffer, size);
+
+
+ oid = *((unsigned int *)buffer);
+ size = sizeof(unsigned int);
+ version = *((unsigned short *)(buffer+size));
+ size += sizeof(unsigned short);
+ threadid = *((unsigned int *)(buffer+size));
+ threadNotify(oid,version,threadid);
+ free(buffer);
+ break;
+
+ case CLOSE_CONNECTION:
+ goto closeconnection;
+
+ case RESPOND_LIVE:
+ liveHostsValid = 0;
+ ctrl = LIVE;
+ send_data((int)acceptfd, &ctrl, sizeof(ctrl));
+ if(timeoutFlag) {
+ printf("send_data: remote machine dead, line:%d\n", __LINE__);
+ timeoutFlag = 0;
+ exit(1);
+ }
+ printf("%s (RESPOND_LIVE)-> Sending LIVE!\n", __func__);
+ break;
+
+ case REMOTE_RESTORE_DUPLICATED_STATE:
+ printf("%s (REMOTE_RESTORE_DUPLICATED_STATE)-> Starting process\n", __func__);
+ recv_data((int)acceptfd, &mid, sizeof(unsigned int));
+ ctrl = DUPLICATION_COMPLETE;
+ send_data((int)acceptfd, &ctrl, sizeof(char));
+ if(!liveHosts[findHost(mid)])
+ break;
+ //ctrl = LIVE;
+ //send_data((int)acceptfd, &ctrl, sizeof(char));
+ pthread_mutex_lock(&leaderFixing_mutex);
+ if(!leaderFixing) {
+ leaderFixing = 1;
+ pthread_mutex_unlock(&leaderFixing_mutex);
+ // begin fixing
+ updateLiveHosts();
+ if(!liveHosts[findHost(mid)]) { //confirmed dead
+ duplicateLostObjects(mid);
+ }
+ if(updateLiveHostsCommit() != 0) {
+ printf("error updateLiveHostsCommit()\n");
+ exit(1);
+ }
+ // finish fixing
+ pthread_mutex_lock(&leaderFixing_mutex);
+ leaderFixing = 0;
+ pthread_mutex_unlock(&leaderFixing_mutex);
+ //ctrl = DUPLICATION_COMPLETE;
+ //send_data((int)acceptfd, &ctrl, sizeof(char));
+ }
+ else {
+ pthread_mutex_unlock(&leaderFixing_mutex);
+ //while(leaderFixing);
+ }
+ break;
+
+ case UPDATE_LIVE_HOSTS:
+ // update livehosts.
+ printf("%s (UPDATE_LIVE_HOSTS)-> Attempt to update live machines\n", __func__);
+ // copy back
+ pthread_mutex_lock(&liveHosts_mutex);
+ recv_data((int)acceptfd, liveHosts, sizeof(int)*numHostsInSystem);
+ recv_data((int)acceptfd, locateObjHosts, sizeof(unsigned int)*numHostsInSystem*2);
+ pthread_mutex_unlock(&liveHosts_mutex);
+ liveHostsValid = 1;
+ numLiveHostsInSystem = getNumLiveHostsInSystem();
+ printHostsStatus();
+ printf("%s (UPDATE_LIVE_HOSTS)-> Finished\n", __func__);
+ //exit(0);
+ break;
+
+ case DUPLICATE_ORIGINAL:
+ printf("%s (DUPLICATE_ORIGINAL)-> Attempt to duplicate original objects\n", __func__);
+ //object store stuffffff
+ recv_data((int)acceptfd, &mid, sizeof(unsigned int));
+ tempsize = mhashGetDuplicate(&dupeptr, 0);
+
+ //send control and dupes after
+ ctrl = RECEIVE_DUPES;
+
+ if((sd = getSockWithLock(transRequestSockPool, mid)) < 0) {
+ printf("DUPLICATE_ORIGINAL: socket create error\n");
+ //usleep(1000);
+ }
+ printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
+
+ send_data(sd, &ctrl, sizeof(char));
+ send_data(sd, dupeptr, tempsize);
+
+ recv_data(sd, &response, sizeof(char));
+ if(response != DUPLICATION_COMPLETE) {
+ //fail message
+ }
+ ctrl = DUPLICATION_COMPLETE;
+ send_data((int)acceptfd, &ctrl, sizeof(char));
+ printf("%s (DUPLICATE_ORIGINAL)-> Finished\n", __func__);
+ freeSockWithLock(transRequestSockPool, mid, sd);
+ break;
+
+ case DUPLICATE_BACKUP:
+ printf("%s (DUPLICATE_BACKUP)-> Attempt to duplicate backup objects\n", __func__);
+ //object store stuffffff
+ recv_data((int)acceptfd, &mid, sizeof(unsigned int));
+ tempsize = mhashGetDuplicate(&dupeptr, 1);
+
+ printf("tempsize:%d, dupeptrfirstvalue:%d\n", tempsize, *((unsigned int *)(dupeptr)));
+ //send control and dupes after
+ ctrl = RECEIVE_DUPES;
+ if((sd = getSockWithLock(transRequestSockPool, mid)) < 0) {
+ printf("DUPLICATE_BACKUP: socket create error\n");
+ //usleep(1000);
+ }
+
+ printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
+ send_data(sd, &ctrl, sizeof(char));
+ send_data(sd, dupeptr, tempsize);
+ recv_data(sd, &response, sizeof(char));
+ if(response != DUPLICATION_COMPLETE) {
+ //fail message
+ }
+ ctrl = DUPLICATION_COMPLETE;
+ send_data((int)acceptfd, &ctrl, sizeof(char));
+ printf("%s (DUPLICATE_BACKUP)-> Finished\n", __func__);
+
+ freeSockWithLock(transRequestSockPool, mid, sd);
+ break;
+
+ case RECEIVE_DUPES:
+ if((val = readDuplicateObjs((int)acceptfd)) != 0) {
+ printf("Error: In readDuplicateObjs() %s, %d\n", __FILE__, __LINE__);
+ pthread_exit(NULL);
+ }
+ ctrl = DUPLICATION_COMPLETE;
+ send_data((int)acceptfd, &ctrl, sizeof(char));
+ break;
+
+ case PAXOS_PREPARE:
+ recv_data((int)acceptfd, &val, sizeof(int));
+ printf("%s (PAXOS_PREPARE)-> prop n:%d, n_h:%d\n", __func__, val, n_h);
+ if (val <= n_h) {
+ control = PAXOS_PREPARE_REJECT;
+ send_data((int)acceptfd, &control, sizeof(char));
+ }
+ else {
+ n_h = val;
+ control = PAXOS_PREPARE_OK;
+ printf("%s (PAXOS_PREPARE)-> n_h now:%d, sending OK\n", __func__, n_h);
+ send_data((int)acceptfd, &control, sizeof(char));
+ send_data((int)acceptfd, &n_a, sizeof(int));
+ send_data((int)acceptfd, &v_a, sizeof(int));
+ }
+ break;
+
+ case PAXOS_ACCEPT:
+ recv_data((int)acceptfd, &n, sizeof(int));
+ recv_data((int)acceptfd, &v, sizeof(int));
+ if (n < n_h) {
+ control = PAXOS_ACCEPT_REJECT;
+ send_data((int)acceptfd, &control, sizeof(char));
+ }
+ else {
+ n_a = n;
+ v_a = v;
+ n_h = n;
+ control = PAXOS_ACCEPT_OK;
+ send_data((int)acceptfd, &control, sizeof(char));
+ }
+ break;
+
+ case PAXOS_LEARN:
+ recv_data((int)acceptfd, &v, sizeof(int));
+ leader = v_a;
+ paxosRound++;
+ printf("%s (PAXOS_LEARN)-> This is my leader!: [%s]\n", __func__, midtoIPString(leader));
+ break;
+
+ case DELETE_LEADER:
+ v_a = 0;
+ break;
+
+ default:
+ printf("Error: dstmAccept() Unknown opcode %d at %s, %d\n", control, __FILE__, __LINE__);
+ }
+ }
+ printf("%s-> Exiting\n", __func__); fflush(stdout);
+closeconnection:
+ /* Close connection */
+ if (close((int)acceptfd) == -1)
+ perror("close");
+ pthread_exit(NULL);
+}
+
+int readDuplicateObjs(int acceptfd) {
+ int numoid, i, size, tmpsize;
+ unsigned int oid;
+ void *dupeptr, *ptrcreate, *ptr;
+ objheader_t *header;
+
+ printf("%s-> Start\n", __func__);
+ recv_data((int)acceptfd, &numoid, sizeof(unsigned int));
+ recv_data((int)acceptfd, &size, sizeof(int));
+ // do i need array of oids?
+ // answer: no! now get to work
+ printf("%s-> numoid:%d, size:%d\n", __func__, numoid, size);
+ if(numoid != 0) {
+ if ((dupeptr = calloc(1, size)) == NULL) {
+ printf("calloc error for duplicated objects %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ recv_data((int)acceptfd, dupeptr, size);
+ ptr = dupeptr;
+ for(i = 0; i < numoid; i++) {
+ header = (objheader_t *)ptr;
+ oid = OID(header);
+ GETSIZE(tmpsize, header);
+ tmpsize += sizeof(objheader_t);
+ printf("%s-> oid being received/backed:%u, version:%d, type:%d\n", __func__, oid, header->version, TYPE(header));
+ printf("STATUSPTR(header):%u, STATUS:%d\n", STATUSPTR(header), STATUS(header));
+ pthread_mutex_lock(&mainobjstore_mutex);
+ if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
+ printf("Error: readDuplicateObjs() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
+ pthread_mutex_unlock(&mainobjstore_mutex);
+ return 1;
+ }
+ pthread_mutex_unlock(&mainobjstore_mutex);
+ memcpy(ptrcreate, header, tmpsize);
+
+ mhashInsert(oid, ptrcreate);
+ ptr += tmpsize;
+ }
+
+ printf("%s-> End\n", __func__);
+ return 0;
+ }
+ else {
+ printf("%s-> No objects duplicated\n", __func__);
+ return 0;
+ }
+}
+
+/* This function reads the information available in a transaction request
+ * and makes a function call to process the request */
+int readClientReq(trans_commit_data_t *transinfo, int acceptfd) {
+ char *ptr;
+ void *modptr;
+ unsigned int *oidmod, oid;
+ fixed_data_t fixed;
+ objheader_t *headaddr;
+ int sum, i, size, n, val;
+
+ oidmod = NULL;
+ printf("%s-> Entering\n", __func__);
+
+ /* Read fixed_data_t data structure */
+ size = sizeof(fixed) - 1;
+ ptr = (char *)&fixed;
+ fixed.control = TRANS_REQUEST;
+ recv_data((int)acceptfd, ptr+1, size);
+
+ /* Read list of mids */
+ int mcount = fixed.mcount;
+ size = mcount * sizeof(unsigned int);
+ unsigned int listmid[mcount];
+ ptr = (char *) listmid;
+ recv_data((int)acceptfd, ptr, size);
+
+
+ /* Read oid and version tuples for those objects that are not modified in the transaction */
+ int numread = fixed.numread;
+ size = numread * (sizeof(unsigned int) + sizeof(unsigned short));
+ char objread[size];
+ if(numread != 0) { //If pile contains more than one object to be read,
+ // keep reading all objects
+ recv_data((int)acceptfd, objread, size);
+ }
+
+ /* Read modified objects */
+ if(fixed.nummod != 0) {
+ if ((modptr = calloc(1, fixed.sum_bytes)) == NULL) {
+ printf("calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ size = fixed.sum_bytes;
+ recv_data((int)acceptfd, modptr, size);
+ }
+
+ /* Create an array of oids for modified objects */
+ oidmod = (unsigned int *) calloc(fixed.nummod, sizeof(unsigned int));
+ if (oidmod == NULL) {
+ printf("calloc error %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ ptr = (char *) modptr;
+ for(i = 0 ; i < fixed.nummod; i++) {
+ int tmpsize;
+ headaddr = (objheader_t *) ptr;
+ oid = OID(headaddr);
+ oidmod[i] = oid;
+ GETSIZE(tmpsize, headaddr);
+ ptr += sizeof(objheader_t) + tmpsize;
+ }
+
+ printf("%s-> num oid read = %d, oids modified = %d, size = %d\n", __func__, fixed.numread, fixed.nummod, size); fflush(stdout);
+// sleep(1);
+ /*Process the information read */
+ if((val = processClientReq(&fixed, transinfo, listmid, objread, modptr, oidmod, acceptfd)) != 0) {
+ printf("Error: In processClientReq() %s, %d\n", __FILE__, __LINE__);
+ /* Free resources */
+ if(oidmod != NULL) {
+ free(oidmod);
+ }
+ return 1;
+ }
+
+ /* Free resources */
+ if(oidmod != NULL) {
+ free(oidmod);
+ }
+ printf("%s-> Exiting\n", __func__);
+
+ return 0;
+}
+
+/* This function processes the Coordinator's transaction request using "handleTransReq"
+ * function and sends a reply to the co-ordinator.
+ * Following this it also receives a new control message from the co-ordinator and processes this message*/
+int processClientReq(fixed_data_t *fixed, trans_commit_data_t *transinfo,
+ unsigned int *listmid, char *objread, void *modptr, unsigned int *oidmod, int acceptfd) {
+
+ char control, sendctrl, retval;
+ objheader_t *tmp_header;
+ void *header;
+ int i = 0, val;
+
+ printf("%s-> Entering\n", __func__);
+ /* Send reply to the Coordinator */
+ if((retval = handleTransReq(fixed, transinfo, listmid, objread, modptr,acceptfd)) == 0 ) {
+ printf("Error: In handleTransReq() %s, %d\n", __FILE__, __LINE__);
+ printf("DEBUG-> Exiting processClientReq, line = %d\n", __LINE__);
+ return 1;
+ }
+
+ recv_data((int)acceptfd, &control, sizeof(char));
+ /* Process the new control message */
+ switch(control) {
+ case TRANS_ABORT:
+ if (fixed->nummod > 0)
+ free(modptr);
+ /* Unlock objects that was locked due to this transaction */
+ int useWriteUnlock = 0;
+ for(i = 0; i< transinfo->numlocked; i++) {
+ if(transinfo->objlocked[i] == -1) {
+ useWriteUnlock = 1;
+ continue;
+ }
+ if((header = mhashSearch(transinfo->objlocked[i])) == NULL) {
+ printf("mhashSearch returns NULL at %s, %d\n", __FILE__, __LINE__); // find the header address
+ printf("%s-> Exiting, line:%d\n", __func__, __LINE__);
+ return 1;
+ }
+ if(useWriteUnlock) {
+ write_unlock(STATUSPTR(header));
+ } else {
+ read_unlock(STATUSPTR(header));
+ }
+ }
+ break;
+
+ case TRANS_COMMIT:
+ /* Invoke the transCommit process() */
+ if((val = transCommitProcess(modptr, oidmod, transinfo->objlocked, fixed->nummod, transinfo->numlocked, (int)acceptfd)) != 0) {
+ printf("Error: In transCommitProcess() %s, %d\n", __FILE__, __LINE__);
+ /* Free memory */
+ if (transinfo->objlocked != NULL) {
+ free(transinfo->objlocked);
+ }
+ if (transinfo->objnotfound != NULL) {
+ free(transinfo->objnotfound);
+ }
+ printf("%s-> Exiting, line:%d\n", __func__, __LINE__);
+ return 1;
+ }
+ break;
+
+ case TRANS_ABORT_BUT_RETRY_COMMIT_WITH_RELOCATING:
+ break;
+
+ default:
+ printf("Error: No response to TRANS_AGREE OR DISAGREE protocol %s, %d\n", __FILE__, __LINE__);
+ //TODO Use fixed.trans_id TID since Client may have died
+ break;
+ }
+
+ /* Free memory */
+ if (transinfo->objlocked != NULL) {
+ free(transinfo->objlocked);
+ }
+ if (transinfo->objnotfound != NULL) {
+ free(transinfo->objnotfound);
+ }
+ printf("%s-> Exiting, line:%d\n", __func__, __LINE__);
+
+ return 0;
+}
+
+/* This function increments counters while running a voting decision on all objects involved
+ * in TRANS_REQUEST and If a TRANS_DISAGREE sends the response immediately back to the coordinator */
+char handleTransReq(fixed_data_t *fixed, trans_commit_data_t *transinfo, unsigned int *listmid, char *objread, void *modptr, int acceptfd) {
+ int val, i = 0, j;
+ unsigned short version;
+ char control = 0, *ptr;
+ unsigned int oid;
+ unsigned int *oidnotfound, *oidlocked, *oidvernotmatch;
+ objheader_t *headptr;
+
+ /* Counters and arrays to formulate decision on control message to be sent */
+ oidnotfound = (unsigned int *) calloc(fixed->numread + fixed->nummod, sizeof(unsigned int));
+ oidlocked = (unsigned int *) calloc(fixed->numread + fixed->nummod + 1, sizeof(unsigned int));
+ oidvernotmatch = (unsigned int *) calloc(fixed->numread + fixed->nummod, sizeof(unsigned int));
+ int objnotfound = 0, objlocked = 0, objvernotmatch = 0;
+ int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
+ int numBytes = 0;
+ /* modptr points to the beginning of the object store
+ * created at the Pariticipant.
+ * Object store holds the modified objects involved in the transaction request */
+ ptr = (char *) modptr;
+
+ /* Process each oid in the machine pile/ group per thread */
+ for (i = 0; i < fixed->numread + fixed->nummod; i++) {
+ if (i < fixed->numread) { //Objs only read and not modified
+ int incr = sizeof(unsigned int) + sizeof(unsigned short); // Offset that points to next position in the objread array
+ incr *= i;
+ oid = *((unsigned int *)(objread + incr));
+ incr += sizeof(unsigned int);
+ version = *((unsigned short *)(objread + incr));
+ getCommitCountForObjRead(oidnotfound, oidlocked, oidvernotmatch, &objnotfound, &objlocked, &objvernotmatch,
+ &v_matchnolock, &v_matchlock, &v_nomatch, &numBytes, &control, oid, version);
+ } else { //Objs modified
+ if(i == fixed->numread) {
+ oidlocked[objlocked++] = -1;
+ }
+ int tmpsize;
+ headptr = (objheader_t *) ptr;
+ oid = OID(headptr);
+ version = headptr->version;
+ GETSIZE(tmpsize, headptr);
+ ptr += sizeof(objheader_t) + tmpsize;
+ getCommitCountForObjMod(oidnotfound, oidlocked, oidvernotmatch, &objnotfound,
+ &objlocked, &objvernotmatch, &v_matchnolock, &v_matchlock, &v_nomatch,
+ &numBytes, &control, oid, version);
+ }
+ }
+
+ /* send TRANS_DISAGREE and objs*/
+ if(v_nomatch > 0) {
+#ifdef CACHE
+ char *objs = calloc(1, numBytes);
+ int j, offset = 0;
+ for(j = 0; j<objvernotmatch; j++) {
+ objheader_t *header = mhashSearch(oidvernotmatch[j]);
+ int size = 0;
+ GETSIZE(size, header);
+ size += sizeof(objheader_t);
+ memcpy(objs+offset, header, size);
+ offset += size;
+ }
+#endif
+ if (objlocked > 0) {
+ int useWriteUnlock = 0;
+ for(j = 0; j < objlocked; j++) {
+ if(oidlocked[j] == -1) {
+ useWriteUnlock = 1;
+ continue;
+ }
+ if((headptr = mhashSearch(oidlocked[j])) == NULL) {
+ printf("mhashSearch returns NULL at %s, %d\n", __FILE__, __LINE__);
+ return 0;
+ }
+ if(useWriteUnlock) {
+ write_unlock(STATUSPTR(headptr));
+ } else {
+ read_unlock(STATUSPTR(headptr));
+ }
+ }
+ free(oidlocked);
+ }
+ printf("control = %d, file = %s, line = %d\n", (int)control, __FILE__, __LINE__);
+
+ send_data(acceptfd, &control, sizeof(char));
+#ifdef CACHE
+ send_data(acceptfd, &numBytes, sizeof(int));
+ send_data(acceptfd, objs, numBytes);
+if(timeoutFlag || timeoutFlag) {
+printf("send_data: remote machine dead, line:%d\n", __LINE__);
+timeoutFlag = 0;
+timeoutFlag = 0;
+exit(1);
+}
+
+ transinfo->objvernotmatch = oidvernotmatch;
+ transinfo->numvernotmatch = objvernotmatch;
+ free(objs);
+ free(transinfo->objvernotmatch);
+#endif
+ return control;
+ }
+
+ /* Decide what control message to send to Coordinator */
+ if ((control = decideCtrlMessage(fixed, transinfo, &v_matchnolock, &v_matchlock, &v_nomatch, &objnotfound, &objlocked,
+ modptr, oidnotfound, oidlocked, acceptfd)) == 0) {
+ printf("Error: In decideCtrlMessage() %s, %d\n", __FILE__, __LINE__);
+ return 0;
+ }
+ return control;
+}
+
+/* Update Commit info for objects that are modified */
+void getCommitCountForObjMod(unsigned int *oidnotfound, unsigned int *oidlocked,
+ unsigned int *oidvernotmatch, int *objnotfound, int *objlocked, int *objvernotmatch,
+ int *v_matchnolock, int *v_matchlock, int *v_nomatch, int *numBytes,
+ char *control, unsigned int oid, unsigned short version) {
+ void *mobj;
+ /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
+ //printf("version number: %d\n", version);
+#ifdef RECOVERY
+ if(version == 1) {
+ (*v_matchnolock)++;
+ printf("*backup object* oid:%u\n", oid);
+ return;
+ }
+#endif
+
+ if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
+ printf("Obj not found: %s() oid = %d, type = %d\t\n", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ fflush(stdout);
+ /* Save the oids not found and number of oids not found for later use */
+ oidnotfound[*objnotfound] = oid;
+ (*objnotfound)++;
+ } else { /* If Obj found in machine (i.e. has not moved) */
+ printf("Obj found: %s() oid = %d, type = %d\t\n", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ fflush(stdout);
+ /* Check if Obj is locked by any previous transaction */
+ if (write_trylock(STATUSPTR(mobj))) { // Can acquire write lock
+
+ printf("****%s->Trying to acquire 'remote' writelock for oid:%d, version:%d\n", __func__, oid, version);
+ printf("this version: %d, mlookup version: %d\n", version, ((objheader_t *)mobj)->version);
+ if (version == ((objheader_t *)mobj)->version) { /* match versions */
+ (*v_matchnolock)++;
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ oidvernotmatch[*objvernotmatch] = oid;
+ (*objvernotmatch)++;
+ int size;
+ GETSIZE(size, mobj);
+ size += sizeof(objheader_t);
+ *numBytes += size;
+ /* Send TRANS_DISAGREE to Coordinator */
+ *control = TRANS_DISAGREE;
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ }
+ //Keep track of oid locked
+ oidlocked[(*objlocked)++] = OID(((objheader_t *)mobj));
+ } else { //we are locked
+ if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
+ (*v_matchlock)++;
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ oidvernotmatch[*objvernotmatch] = oid;
+ (*objvernotmatch)++;
+ int size;
+ GETSIZE(size, mobj);
+ size += sizeof(objheader_t);
+ *numBytes += size;
+ *control = TRANS_DISAGREE;
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ }
+ }
+ }
+ printf("oid: %u, v_matchnolock: %d, v_matchlock: %d, v_nomatch: %d\n", oid, *v_matchnolock, *v_matchlock, *v_nomatch);
+}
+
+/* Update Commit info for objects that are read */
+void getCommitCountForObjRead(unsigned int *oidnotfound, unsigned int *oidlocked, unsigned int *oidvernotmatch,
+ int *objnotfound, int *objlocked, int * objvernotmatch, int *v_matchnolock, int *v_matchlock,
+ int *v_nomatch, int *numBytes, char *control, unsigned int oid, unsigned short version) {
+ void *mobj;
+ /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
+ //printf("version number: %d\n", version);
+#ifdef RECOVERY
+ if(version == 1) {
+ (*v_matchnolock)++;
+ printf("*backup object* oid:%u\n", oid);
+ return;
+ }
+#endif
+ if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
+ printf("Obj not found: %s() file:%s oid = %d, type = %d\t\n", __func__, __FILE__, OID(mobj), TYPE((objheader_t *)mobj));
+ fflush(stdout);
+ /* Save the oids not found and number of oids not found for later use */
+ oidnotfound[*objnotfound] = oid;
+ (*objnotfound)++;
+ } else { /* If Obj found in machine (i.e. has not moved) */
+ printf("Obj found: %s() file:%s oid = %d, type = %d\t\n", __func__, __FILE__, OID(mobj), TYPE((objheader_t *)mobj));
+ fflush(stdout);
+ /* Check if Obj is locked by any previous transaction */
+ if (read_trylock(STATUSPTR(mobj))) { //Can further acquire read locks
+ if (version == ((objheader_t *)mobj)->version) { /* match versions */
+ (*v_matchnolock)++;
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ oidvernotmatch[(*objvernotmatch)++] = oid;
+ int size;
+ GETSIZE(size, mobj);
+ size += sizeof(objheader_t);
+ *numBytes += size;
+ /* Send TRANS_DISAGREE to Coordinator */
+ *control = TRANS_DISAGREE;
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ }
+ //Keep track of oid locked
+ oidlocked[(*objlocked)++] = OID(((objheader_t *)mobj));
+ } else { /* Some other transaction has aquired a write lock on this object */
+ if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
+ (*v_matchlock)++;
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ oidvernotmatch[*objvernotmatch] = oid;
+ (*objvernotmatch)++;
+ int size;
+ GETSIZE(size, mobj);
+ size += sizeof(objheader_t);
+ *numBytes += size;
+ *control = TRANS_DISAGREE;
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ }
+ }
+ }
+ printf("oid: %u, v_matchnolock: %d, v_matchlock: %d, v_nomatch: %d\n", oid, *v_matchnolock, *v_matchlock, *v_nomatch);
+}
+
+/* This function decides what control message such as TRANS_AGREE, TRANS_DISAGREE or TRANS_SOFT_ABORT
+ * to send to Coordinator based on the votes of oids involved in the transaction */
+char decideCtrlMessage(fixed_data_t *fixed, trans_commit_data_t *transinfo, int *v_matchnolock, int *v_matchlock,
+ int *v_nomatch, int *objnotfound, int *objlocked, void *modptr,
+ unsigned int *oidnotfound, unsigned int *oidlocked, int acceptfd) {
+ int val;
+ char control = 0;
+
+ /* Condition to send TRANS_AGREE */
+ if(*(v_matchnolock) == fixed->numread + fixed->nummod) {
+ control = TRANS_AGREE;
+ /* Send control message */
+ printf("control = %d, file = %s, line = %d\n", (int)control, __FILE__, __LINE__);
+ send_data(acceptfd, &control, sizeof(char));
+if(timeoutFlag || timeoutFlag) {
+printf("send_data: remote machine dead, line:%d\n", __LINE__);
+timeoutFlag = 0;
+timeoutFlag = 0;
+exit(1);
+}
+
+ printf("finished sending control\n");
+ }
+ /* Condition to send TRANS_SOFT_ABORT */
+ if((*(v_matchlock) > 0 && *(v_nomatch) == 0) || (*(objnotfound) > 0 && *(v_nomatch) == 0)) {
+ control = TRANS_SOFT_ABORT;
+
+ printf("control = %d, file = %s, line = %d\n", (int)control, __FILE__, __LINE__);
+ /* Send control message */
+ send_data(acceptfd, &control, sizeof(char));
+
+ /* FIXME how to send objs Send number of oids not found and the missing oids if objects are missing in the machine */
+ if(*(objnotfound) != 0) {
+ int msg[1];
+ msg[0] = *(objnotfound);
+ send_data(acceptfd, &msg, sizeof(int));
+ int size = sizeof(unsigned int)* *(objnotfound);
+ send_data(acceptfd, oidnotfound, size);
+ }
+ }
+
+ /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
+ * if Participant receives a TRANS_COMMIT */
+ transinfo->objlocked = oidlocked;
+ transinfo->objnotfound = oidnotfound;
+ transinfo->modptr = modptr;
+ transinfo->numlocked = *(objlocked);
+ transinfo->numnotfound = *(objnotfound);
+ return control;
+}
+
+/* This function processes all modified objects involved in a TRANS_COMMIT and updates pointer
+ * addresses in lookup table and also changes version number
+ * Sends an ACK back to Coordinator */
+int transCommitProcess(void *modptr, unsigned int *oidmod, unsigned int *oidlocked, int nummod, int numlocked, int acceptfd) {
+ objheader_t *header;
+ objheader_t *newheader;
+ int i = 0, offset = 0;
+ char control;
+ int tmpsize;
+ void *ptrcreate;
+ printf("DEBUG-> Entering transCommitProcess, dstmserver.c\n");
+ printf("nummod: %d, numlocked: %d\n", nummod, numlocked);
+
+ /* Process each modified object saved in the mainobject store */
+ for(i = 0; i < nummod; i++) {
+ if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
+#ifndef RECOVERY
+ printf("Error: mhashsearch returns NULL at dstmserver.c %d\n", __LINE__);
+ return 1;
+#else
+ printf("DEBUG->*backup* i:%d, nummod:%d\n", i, nummod);
+ header = (objheader_t *)(modptr+offset);
+ header->version += 1;
+ header->isBackup = 1;
+ printf("oid: %u, new header version: %d\n", oidmod[i], header->version);
+ GETSIZE(tmpsize, header);
+ tmpsize += sizeof(objheader_t);
+ pthread_mutex_lock(&mainobjstore_mutex);
+ if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
+ printf("Error: transComProcess() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
+ pthread_mutex_unlock(&mainobjstore_mutex);
+ return 1;
+ }
+ pthread_mutex_unlock(&mainobjstore_mutex);
+ /* Initialize read and write locks */
+ initdsmlocks(STATUSPTR(header));
+ memcpy(ptrcreate, header, tmpsize);
+ mhashInsert(oidmod[i], ptrcreate);
+
+ offset += tmpsize;
+#endif
+ }
+ else{
+
+ GETSIZE(tmpsize,header);
+
+ {
+ struct ___Object___ *dst=(struct ___Object___*)((char*)header+sizeof(objheader_t));
+ struct ___Object___ *src=(struct ___Object___*)((char*)modptr+sizeof(objheader_t)+offset);
+ dst->type=src->type;
+ dst->___cachedCode___=src->___cachedCode___;
+ dst->___cachedHash___=src->___cachedHash___;
+ memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
+ }
+ header->version += 1;
+ printf("oid: %u, new header version: %d\n", oidmod[i], header->version);
+ /* If threads are waiting on this object to be updated, notify them */
+ if(header->notifylist != NULL) {
+ notifyAll(&header->notifylist, OID(header), header->version);
+ }
+ offset += sizeof(objheader_t) + tmpsize;
+ }
+}
+ if (nummod > 0)
+ free(modptr);
+
+ /* Unlock locked objects */
+ int useWriteUnlock = 0;
+ for(i = 0; i < numlocked; i++) {
+ if(oidlocked[i] == -1) {
+ useWriteUnlock = 1;
+ continue;
+ }
+ if((header = (objheader_t *) mhashSearch(oidlocked[i])) == NULL) {
+ printf("Error: mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ printf("header oid:%d, version:%d, useWriteUnlock:%d\n", OID(header), header->version, useWriteUnlock);
+ if(useWriteUnlock) {
+ write_unlock(STATUSPTR(header));
+ } else {
+ read_unlock(STATUSPTR(header));
+ }
+ }
+ //TODO Update location lookup table
+ return 0;
+}
+
+/* This function recevies the oid and offset tuples from the Coordinator's prefetch call.
+ * Looks for the objects to be prefetched in the main object store.
+ * If objects are not found then record those and if objects are found
+ * then use offset values to prefetch references to other objects */
+
+int prefetchReq(int acceptfd) {
+ int i, size, objsize, numoffset = 0;
+ int length;
+ char *recvbuffer, control;
+ unsigned int oid, mid=-1;
+ objheader_t *header;
+ oidmidpair_t oidmid;
+ int sd = -1;
+
+ while(1) {
+ recv_data((int)acceptfd, &numoffset, sizeof(int));
+ if(numoffset == -1)
+ break;
+ recv_data((int)acceptfd, &oidmid, 2*sizeof(unsigned int));
+ oid = oidmid.oid;
+ if (mid != oidmid.mid) {
+ if (mid!=-1) {
+ freeSockWithLock(transPResponseSocketPool, mid, sd);
+ }
+ mid=oidmid.mid;
+ sd = getSockWithLock(transPResponseSocketPool, mid);
+ }
+ short offsetarry[numoffset];
+ recv_data((int) acceptfd, offsetarry, numoffset*sizeof(short));
+
+ /*Process each oid */
+ if ((header = mhashSearch(oid)) == NULL) { /* Obj not found */
+ /* Save the oids not found in buffer for later use */
+ size = sizeof(int) + sizeof(char) + sizeof(unsigned int) ;
+ char sendbuffer[size];
+ *((int *) sendbuffer) = size;
+ *((char *)(sendbuffer + sizeof(int))) = OBJECT_NOT_FOUND;
+ *((unsigned int *)(sendbuffer + sizeof(int) + sizeof(char))) = oid;
+ control = TRANS_PREFETCH_RESPONSE;
+ sendPrefetchResponse(sd, &control, sendbuffer, &size);
+ } else { /* Object Found */
+ int incr = 0;
+ GETSIZE(objsize, header);
+ size = sizeof(int) + sizeof(char) + sizeof(unsigned int) + sizeof(objheader_t) + objsize;
+ char sendbuffer[size];
+ *((int *)(sendbuffer + incr)) = size;
+ incr += sizeof(int);
+ *((char *)(sendbuffer + incr)) = OBJECT_FOUND;
+ incr += sizeof(char);
+ *((unsigned int *)(sendbuffer+incr)) = oid;
+ incr += sizeof(unsigned int);
+ memcpy(sendbuffer + incr, header, objsize + sizeof(objheader_t));
+
+ control = TRANS_PREFETCH_RESPONSE;
+ sendPrefetchResponse(sd, &control, sendbuffer, &size);
+
+ /* Calculate the oid corresponding to the offset value */
+ for(i = 0 ; i< numoffset ; i++) {
+ /* Check for arrays */
+ if(TYPE(header) >= NUMCLASSES) {
+ int elementsize = classsize[TYPE(header)];
+ struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
+ unsigned short length = ao->___length___;
+ /* Check if array out of bounds */
+ if(offsetarry[i]< 0 || offsetarry[i] >= length) {
+ break;
+ }
+ oid = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + sizeof(struct ArrayObject) + (elementsize*offsetarry[i])));
+ } else {
+ oid = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + offsetarry[i]));
+ }
+
+ /* Don't continue if we hit a NULL pointer */
+ if (oid==0)
+ break;
+
+ if((header = mhashSearch(oid)) == NULL) {
+ size = sizeof(int) + sizeof(char) + sizeof(unsigned int) ;
+ char sendbuffer[size];
+ *((int *) sendbuffer) = size;
+ *((char *)(sendbuffer + sizeof(int))) = OBJECT_NOT_FOUND;
+ *((unsigned int *)(sendbuffer + sizeof(int) + sizeof(char))) = oid;
+
+ control = TRANS_PREFETCH_RESPONSE;
+ sendPrefetchResponse(sd, &control, sendbuffer, &size);
+ break;
+ } else { /* Obj Found */
+ int incr = 0;
+ GETSIZE(objsize, header);
+ size = sizeof(int) + sizeof(char) + sizeof(unsigned int) + sizeof(objheader_t) + objsize;
+ char sendbuffer[size];
+ *((int *)(sendbuffer + incr)) = size;
+ incr += sizeof(int);
+ *((char *)(sendbuffer + incr)) = OBJECT_FOUND;
+ incr += sizeof(char);
+ *((unsigned int *)(sendbuffer+incr)) = oid;
+ incr += sizeof(unsigned int);
+ memcpy(sendbuffer + incr, header, objsize + sizeof(objheader_t));
+
+ control = TRANS_PREFETCH_RESPONSE;
+ sendPrefetchResponse(sd, &control, sendbuffer, &size);
+ }
+ } //end of for
+ }
+ } //end of while
+ //Release socket
+ if (mid!=-1)
+ freeSockWithLock(transPResponseSocketPool, mid, sd);
+
+ return 0;
+}
+
+void sendPrefetchResponse(int sd, char *control, char *sendbuffer, int *size) {
+ printf("control = %d, file = %s, line = %d\n", (int)control, __FILE__, __LINE__);
+ send_data(sd, control, sizeof(char));
+ /* Send the buffer with its size */
+ int length = *(size);
+ send_data(sd, sendbuffer, length);
+}
+
+void processReqNotify(unsigned int numoid, unsigned int *oidarry, unsigned short *versionarry, unsigned int mid, unsigned int threadid) {
+ objheader_t *header;
+ unsigned int oid;
+ unsigned short newversion;
+ char msg[1+ 2 * sizeof(unsigned int) + sizeof(unsigned short)];
+ int sd;
+ struct sockaddr_in remoteAddr;
+ int bytesSent;
+ int size;
+ int i = 0;
+
+ while(i < numoid) {
+ oid = *(oidarry + i);
+ if((header = (objheader_t *) mhashSearch(oid)) == NULL) {
+ printf("Error: mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
+ return;
+ } else {
+ /* Check to see if versions are same */
+checkversion:
+ if (write_trylock(STATUSPTR(header))) { // Can acquire write lock
+ newversion = header->version;
+ if(newversion == *(versionarry + i)) {
+ //Add to the notify list
+ if((header->notifylist = insNode(header->notifylist, threadid, mid)) == NULL) {
+ printf("Error: Obj notify list points to NULL %s, %d\n", __FILE__, __LINE__);
+ return;
+ }
+ write_unlock(STATUSPTR(header));
+ } else {
+ write_unlock(STATUSPTR(header));
+ if ((sd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("processReqNotify():socket()");
+ return;
+ }
+ bzero(&remoteAddr, sizeof(remoteAddr));
+ remoteAddr.sin_family = AF_INET;
+ remoteAddr.sin_port = htons(LISTEN_PORT);
+ remoteAddr.sin_addr.s_addr = htonl(mid);
+
+ if (connect(sd, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
+ printf("Error: processReqNotify():error %d connecting to %s:%d\n", errno,
+ inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
+ close(sd);
+ return;
+ } else {
+ //Send Update notification
+ msg[0] = THREAD_NOTIFY_RESPONSE;
+ *((unsigned int *)&msg[1]) = oid;
+ size = sizeof(unsigned int);
+ *((unsigned short *)(&msg[1]+size)) = newversion;
+ size += sizeof(unsigned short);
+ *((unsigned int *)(&msg[1]+size)) = threadid;
+ size = 1+ 2*sizeof(unsigned int) + sizeof(unsigned short);
+ send_data(sd, msg, size);
+ }
+ close(sd);
+ }
+ } else {
+ randomdelay();
+ goto checkversion;
+ }
+ }
+ i++;
+ }
+ free(oidarry);
+ free(versionarry);
+}
--- /dev/null
+#include "gCollect.h"
+#include "prelookup.h"
+
+
+extern pthread_mutex_t prefetchcache_mutex; //Mutex to lock Prefetch Cache
+extern prehashtable_t pflookup; //Global prefetch cache lookup table
+prefetchNodeInfo_t pNodeInfo; //Global prefetch holding metadata
+
+#define OSUSED(x) (((unsigned int)(x)->top)-((unsigned int) (x+1)))
+#define OSFREE(x) ((x)->size-OSUSED(x))
+
+void initializePCache() {
+ objstr_t * os=objstrCreate(DEFAULT_OBJ_STORE_SIZE);
+ pNodeInfo.oldptr = os;
+ pNodeInfo.newptr = os;
+ pNodeInfo.os_count = 1; //for prefetch cache allocated by objstralloc in trans.c file
+ pNodeInfo.oldstale=NULL;
+ pNodeInfo.newstale=NULL;
+ pNodeInfo.stale_count=0;
+ pNodeInfo.stall=0;
+}
+
+objstr_t * getObjStr(unsigned int size) {
+ if (pNodeInfo.stall>0)
+ pNodeInfo.stall--;
+ if (size<=DEFAULT_OBJ_STORE_SIZE&&pNodeInfo.stale_count>STALE_MINTHRESHOLD&&pNodeInfo.stall==0) {
+ //recycle
+ objstr_t * tmp=pNodeInfo.oldstale;
+ pNodeInfo.oldstale=pNodeInfo.oldstale->prev;
+ if (pNodeInfo.oldstale==NULL)
+ pNodeInfo.newstale=NULL;
+ pNodeInfo.stale_count--;
+ tmp->top=tmp+1;
+ tmp->prev=NULL;
+ return tmp;
+ } else {
+ int allocsize=(size>DEFAULT_OBJ_STORE_SIZE)?size:DEFAULT_OBJ_STORE_SIZE;
+ return objstrCreate(allocsize);
+ }
+}
+
+void *prefetchobjstrAlloc(unsigned int size) {
+ //try existing space in first two OS
+ objstr_t *os=pNodeInfo.newptr;
+ if ((size&7)!=0)
+ size+=(8-(size&7));
+ if (size<=OSFREE(os)) {
+ void *tmp=os->top;
+ os->top=((char *)os->top)+size;
+ return tmp;
+ }
+ if ((os=os->next)!=NULL&&(size<=OSFREE(os))) {
+ void *tmp=os->top;
+ os->top=((char *)os->top)+size;
+ return tmp;
+ }
+ //need to allocate new space
+ objstr_t *tmp=getObjStr(size);;
+
+ //link new node in
+ tmp->next=pNodeInfo.newptr;
+ pNodeInfo.newptr->prev=tmp;
+ pNodeInfo.newptr=tmp;
+ pNodeInfo.os_count++;
+
+ if (pNodeInfo.os_count>PREFETCH_FLUSH_THRESHOLD) {
+ //remove oldest from linked list
+ objstr_t *tofree=pNodeInfo.oldptr;
+ pNodeInfo.oldptr=tofree->prev;
+ pNodeInfo.os_count--;
+ //need to flush cache
+ clearBlock(tofree);
+ if (pNodeInfo.newstale==NULL) {
+ //first store
+ pNodeInfo.newstale=pNodeInfo.oldstale=tofree;
+ tofree->prev=NULL;
+ pNodeInfo.stale_count++;
+ } else {
+ //just add it to the list
+ pNodeInfo.newstale->prev=tofree;
+ pNodeInfo.newstale=tofree;
+ pNodeInfo.stale_count++;
+ }
+ if (pNodeInfo.stale_count>STALE_MAXTHRESHOLD) {
+ //need to toss a store
+ tofree=pNodeInfo.oldstale;
+ pNodeInfo.oldstale=tofree->prev;
+ pNodeInfo.stale_count--;
+ free(tofree);
+ }
+ }
+
+ void *ptr=tmp->top;
+ tmp->top=((char *)tmp->top)+size;
+ return ptr;
+}
+
+void clearBlock(objstr_t *block) {
+ unsigned long int tmpbegin=(unsigned int)block;
+ unsigned long int tmpend=(unsigned int)block->top;
+ int i, j;
+ prehashlistnode_t *ptr;
+ pthread_mutex_lock(&pflookup.lock);
+
+ ptr = pflookup.table;
+ for(i = 0; i<pflookup.size; i++) {
+ prehashlistnode_t *orig=&ptr[i];
+ prehashlistnode_t *curr = orig;
+ prehashlistnode_t *next=curr->next;
+ for(; next != NULL; curr=next, next = next->next) {
+ unsigned int val=(unsigned int)next->val;
+ if ((val>=tmpbegin)&(val<tmpend)) {
+ prehashlistnode_t *tmp=curr->next=next->next;
+ free(next);
+ next=tmp;
+ //loop condition is broken now...need to check before incrementing
+ if (next==NULL)
+ break;
+ }
+ }
+ {
+ unsigned int val=(unsigned int)orig->val;
+ if ((val>=tmpbegin)&(val<tmpend)) {
+ if (orig->next==NULL) {
+ orig->key=0;
+ orig->val=NULL;
+ } else {
+ next=orig->next;
+ orig->val=next->val;
+ orig->key=next->key;
+ orig->next=next->next;
+ free(next);
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&pflookup.lock);
+}
+
+objstr_t *allocateNew(unsigned int size) {
+ objstr_t *tmp;
+ if((tmp = (objstr_t *) calloc(1, (sizeof(objstr_t) +size))) == NULL) {
+ printf("Error: %s() Calloc error %s %d\n", __func__, __FILE__, __LINE__);
+ return NULL;
+ }
+ tmp->size = size;
+ tmp->top = (void *)(((unsigned int)tmp) + sizeof(objstr_t) + size);
+ //Insert newly allocated block into linked list of prefetch cache
+ // Update maxsize of prefetch objstr blocks
+ return tmp;
+}
--- /dev/null
+#ifndef _GCOLLECT_H
+#define _GCOLLECT_H
+
+#include "dstm.h"
+
+/***********************************
+ ****** Global constants **********
+ **********************************/
+
+#define STALE_MINTHRESHOLD 10 //minimum size
+
+#define STALE_MAXTHRESHOLD 30 //ugly hack..if you make this too small things
+// will fail in odd subtle ways
+
+#define DEFAULT_OBJ_STORE_SIZE (4194304-16) //just a little less the 4MB
+#define PREFETCH_FLUSH_THRESHOLD 10 //MINIMUM SIZE BEFORE FLUSHING
+#define STALL_THRESHOLD 15 //number of prefetches stores before we can start freeing old ones
+
+
+
+/*********************************
+ ********* Global variables ******
+ ********************************/
+typedef struct prefetchNodeInfo {
+ objstr_t *oldptr;
+ objstr_t *newptr;
+ int os_count;
+
+ objstr_t *oldstale;
+ objstr_t *newstale;
+ int stale_count;
+ int stall;
+
+} prefetchNodeInfo_t;
+
+/********************************
+ ******** Functions ************
+ *******************************/
+void *prefetchobjstrAlloc(unsigned int size);
+void initializePCache();
+void clearBlock(objstr_t *);
+objstr_t *allocateNew(unsigned int size);
+objstr_t * getObjStr(unsigned int size);
+#endif
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "ip.h"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+
+#define LISTEN_PORT 2156
+
+unsigned int iptoMid(char *addr) {
+ ip_t i;
+ unsigned int mid;
+
+ sscanf(addr, "%d.%d.%d.%d", &i.a, &i.b, &i.c, &i.d);
+ mid = (i.a << 24) | (i.b << 16) | (i.c << 8) | i.d;
+ fflush(stdout);
+ return mid;
+}
+
+void midtoIP(unsigned int mid, char *ptr) {
+ ip_t i;
+
+ i.a = (mid & 0xff000000) >> 24;
+ i.b = (mid & 0x00ff0000) >> 16;
+ i.c = (mid & 0x0000ff00) >> 8;
+ i.d = mid & 0x000000ff;
+ sprintf(ptr, "%d.%d.%d.%d", i.a, i.b, i.c, i.d);
+/*#ifdef DEBUG
+ printf("DEBUG-> midtoIP() mid = %d.%d.%d.%d\n", i.a, i.b, i.c, i.d);
+#endif*/
+ return;
+}
+
+int checkServer(int mid, char *machineip) {
+ int tmpsd;
+ struct sockaddr_in serv_addr;
+ char m[20];
+
+ strncpy(m, machineip, strlen(machineip));
+ // Foreach machine you want to transact with
+ // check if its up and running
+ if ((tmpsd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("");
+ return(-1);
+ }
+ bzero((char*) &serv_addr, sizeof(serv_addr));
+ serv_addr.sin_family = AF_INET;
+ serv_addr.sin_port = htons(LISTEN_PORT);
+ midtoIP(mid, m);
+ m[15] = '\0';
+ serv_addr.sin_addr.s_addr = inet_addr(m);
+ while (connect(tmpsd, (struct sockaddr *) &serv_addr, sizeof(struct sockaddr)) < 0) {
+ sleep(1);
+ }
+ close(tmpsd);
+ return 0;
+}
+
+unsigned int getMyIpAddr(const char *interfaceStr) {
+ int sock;
+ struct ifreq interfaceInfo;
+ struct sockaddr_in *myAddr = (struct sockaddr_in *)&interfaceInfo.ifr_addr;
+
+ memset(&interfaceInfo, 0, sizeof(struct ifreq));
+
+ if((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("getMyIpAddr():socket()");
+ return 1;
+ }
+
+ strcpy(interfaceInfo.ifr_name, interfaceStr);
+ myAddr->sin_family = AF_INET;
+
+ if(ioctl(sock, SIOCGIFADDR, &interfaceInfo) != 0) {
+ perror("getMyIpAddr():ioctl()");
+ return 1;
+ }
+
+ close(sock);
+
+ return ntohl(myAddr->sin_addr.s_addr);
+}
+
+/*
+ main() {
+ unsigned int mid;
+ ip_t i;
+ char ip[16];
+
+ memset(ip, 0, 16);
+ mid = iptoMid("192.10.0.1");
+ printf("mid = %x\n", mid);
+ midtoIP(mid, ip);
+ ip[15] = '\0';
+ printf("%s\n",ip);
+ }
+ */
--- /dev/null
+#ifndef _ip_h_
+#define _ip_h_
+
+typedef struct ip {
+ short a;
+ short b;
+ short c;
+ short d;
+} ip_t;
+
+unsigned int iptoMid(char *);
+void midtoIP(unsigned int, char *);
+int checkServer(int, char *);
+unsigned int getMyIpAddr(const char *interfaceStr);
+
+#endif
--- /dev/null
+/************************************************************************************************
+ IMP NOTE:
+ All llookup hash function prototypes returns 0 on sucess and 1 otherwise
+ llookup hash is an array of lhashlistnode_t
+ oid = mid = 0 in a given lhashlistnode_t for each bin in the hash table ONLY if the entry is empty =>
+ the OID's can be any unsigned int except 0
+
+ Uses pthreads. compile using -lpthread option
+ ***************************************************************************************************/
+#include "llookup.h"
+
+#ifdef SIMPLE_LLOOKUP
+
+extern unsigned int *hostIpAddrs;
+extern unsigned int oidsPerBlock;
+
+unsigned int lhashCreate(unsigned int size, float loadfactor) {
+ return 0;
+}
+
+unsigned int lhashInsert(unsigned int oid, unsigned int mid) {
+ return 0;
+}
+
+unsigned int lhashSearch(unsigned int oid) {
+ if (oidsPerBlock == 0)
+ return hostIpAddrs[0];
+ else
+ return hostIpAddrs[oid / oidsPerBlock];
+}
+
+unsigned int lhashRemove(unsigned int oid) {
+ return 0;
+}
+
+#else
+
+lhashtable_t llookup; //Global Hash table
+
+// Creates a hash table with size and an array of lhashlistnode_t
+unsigned int lhashCreate(unsigned int size, float loadfactor) {
+ lhashlistnode_t *nodes;
+ int i;
+
+ // Allocate space for the hash table
+ if((nodes = calloc(size, sizeof(lhashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ llookup.table = nodes;
+ llookup.size = size;
+ llookup.numelements = 0; // Initial number of elements in the hash
+ llookup.loadfactor = loadfactor;
+ //Initialize the pthread_mutex variable
+ pthread_mutex_init(&llookup.locktable, NULL);
+ return 0;
+}
+
+// Assign to oids to bins inside hash table
+unsigned int lhashFunction(unsigned int oid) {
+ return( oid % (llookup.size));
+}
+
+// Insert oid and mid mapping into the hash table
+unsigned int lhashInsert(unsigned int oid, unsigned int mid) {
+ unsigned int newsize;
+ int index;
+ lhashlistnode_t *ptr, *node;
+
+ if (llookup.numelements > (llookup.loadfactor * llookup.size)) {
+ //Resize Table
+ newsize = 2 * llookup.size + 1;
+ pthread_mutex_lock(&llookup.locktable);
+ lhashResize(newsize);
+ pthread_mutex_unlock(&llookup.locktable);
+ }
+
+ ptr = llookup.table;
+ llookup.numelements++;
+
+ index = lhashFunction(oid);
+#ifdef DEBUG
+ printf("DEBUG(insert) oid = %d, mid =%d, index =%d\n",oid,mid, index);
+#endif
+ pthread_mutex_lock(&llookup.locktable);
+ if(ptr[index].next == NULL && ptr[index].oid == 0) { // Insert at the first position in the hashtable
+ ptr[index].oid = oid;
+ ptr[index].mid = mid;
+ } else { // Insert in the linked list
+ if ((node = calloc(1, sizeof(lhashlistnode_t))) == NULL) {
+ printf("Calloc error %s, %d\n", __FILE__, __LINE__);
+ pthread_mutex_unlock(&llookup.locktable);
+ return 1;
+ }
+ node->oid = oid;
+ node->mid = mid;
+ node->next = ptr[index].next;
+ ptr[index].next = node;
+ }
+
+ pthread_mutex_unlock(&llookup.locktable);
+ return 0;
+}
+
+// Return mid for a given oid in the hash table
+unsigned int lhashSearch(unsigned int oid) {
+ int index;
+ lhashlistnode_t *ptr, *node;
+
+ ptr = llookup.table; // Address of the beginning of hash table
+ index = lhashFunction(oid);
+ node = &ptr[index];
+ pthread_mutex_lock(&llookup.locktable);
+ while(node != NULL) {
+ if(node->oid == oid) {
+ pthread_mutex_unlock(&llookup.locktable);
+ return node->mid;
+ }
+ node = node->next;
+ }
+ pthread_mutex_unlock(&llookup.locktable);
+ return 0;
+}
+
+// Remove an entry from the hash table
+unsigned int lhashRemove(unsigned int oid) {
+ int index;
+ lhashlistnode_t *curr, *prev;
+ lhashlistnode_t *ptr, *node;
+
+ ptr = llookup.table;
+ index = lhashFunction(oid);
+ curr = &ptr[index];
+
+ pthread_mutex_lock(&llookup.locktable);
+ for (; curr != NULL; curr = curr->next) {
+ if (curr->oid == oid) { // Find a match in the hash table
+ llookup.numelements--; // Decrement the number of elements in the global hashtable
+ if ((curr == &ptr[index]) && (curr->next == NULL)) { // Delete the first item inside the hashtable with no linked list of lhashlistnode_t
+ curr->oid = 0;
+ curr->mid = 0;
+ } else if ((curr == &ptr[index]) && (curr->next != NULL)) { //Delete the first item with a linked list of lhashlistnode_t connected
+ curr->oid = curr->next->oid;
+ curr->mid = curr->next->mid;
+ node = curr->next;
+ curr->next = curr->next->next;
+ free(node);
+ } else { // Regular delete from linked listed
+ prev->next = curr->next;
+ free(curr);
+ }
+ pthread_mutex_unlock(&llookup.locktable);
+ return 0;
+ }
+ prev = curr;
+ }
+ pthread_mutex_unlock(&llookup.locktable);
+ return 1;
+}
+
+// Resize table
+unsigned int lhashResize(unsigned int newsize) {
+ lhashlistnode_t *node, *ptr, *curr, *next; // curr and next keep track of the current and the next lhashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the lhashlistnode_t for each bin in hashtable
+ int i,index;
+ lhashlistnode_t *newnode;
+
+ ptr = llookup.table;
+ oldsize = llookup.size;
+
+ if((node = calloc(newsize, sizeof(lhashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ llookup.table = node; //Update the global hashtable upon resize()
+ llookup.size = newsize;
+ llookup.numelements = 0;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ while (curr != NULL) { //Inner loop to go through linked lists
+ if (curr->oid == 0) { //Exit inner loop if there the first element for a given bin/index is NULL
+ break; //oid = mid =0 for element if not present within the hash table
+ }
+ next = curr->next;
+ index = lhashFunction(curr->oid);
+ // Insert into the new table
+ if(llookup.table[index].next == NULL && llookup.table[index].oid == 0) {
+ llookup.table[index].oid = curr->oid;
+ llookup.table[index].mid = curr->mid;
+ llookup.numelements++;
+ } else {
+ if((newnode = calloc(1, sizeof(lhashlistnode_t))) == NULL) {
+ printf("Calloc error %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ newnode->oid = curr->oid;
+ newnode->mid = curr->mid;
+ newnode->next = llookup.table[index].next;
+ llookup.table[index].next = newnode;
+ llookup.numelements++;
+ }
+
+ //free the linked list of lhashlistnode_t if not the first element in the hash table
+ if (isfirst != 1) {
+ free(curr);
+ }
+
+ isfirst = 0;
+ curr = next;
+
+ }
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+#endif
+
--- /dev/null
+#ifndef _LLOOKUP_H_
+#define _LLOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+
+#define SIMPLE_LLOOKUP
+
+#define LOADFACTOR 0.5
+#define HASH_SIZE 100
+
+typedef struct lhashlistnode {
+ unsigned int oid;
+ unsigned int mid;
+ struct lhashlistnode *next;
+} lhashlistnode_t;
+
+typedef struct lhashtable {
+ lhashlistnode_t *table; // points to beginning of hash table
+ unsigned int size;
+ unsigned int numelements;
+ float loadfactor;
+ pthread_mutex_t locktable;
+} lhashtable_t;
+
+//returns 0 for success and 1 for failure
+unsigned int lhashCreate(unsigned int size, float loadfactor);
+//returns 0 for success and 1 for failure
+unsigned int lhashInsert(unsigned int oid, unsigned int mid);
+//returns mid, 0 if not found
+unsigned int lhashSearch(unsigned int oid);
+//returns 0 for success and 1 for failure
+unsigned int lhashRemove(unsigned int oid);
+
+//helper functions
+unsigned int lhashResize(unsigned int newsize);
+unsigned int lhashFunction(unsigned int oid);
+
+#endif
--- /dev/null
+#include "localobjects.h"
+#include <string.h>
+
+void REVERT_OBJ(struct ___Object___ * obj) {
+ int type=((int *)obj)[0];
+ struct ___Object___ * copyobj=obj->___localcopy___;
+ if(((int)copyobj)==1) {
+ obj->___localcopy___=NULL;
+ obj->___nextobject___=NULL;
+ } else if (type<NUMCLASSES) {
+ /* We have a normal object */
+ int size=classsize[type];
+ memcpy(obj, copyobj, size);
+ } else {
+ /* We have an array */
+ struct ArrayObject *ao=(struct ArrayObject *)obj;
+ int elementsize=classsize[type];
+ int length=ao->___length___;
+ int size=sizeof(struct ArrayObject)+length*elementsize;
+ memcpy(obj, copyobj, size);
+ }
+}
+
+#ifdef PRECISE_GC
+void COPY_OBJ(struct garbagelist * gl, struct ___Object___ *obj) {
+#else
+void COPY_OBJ(struct ___Object___ *obj) {
+#endif
+ int type=((int *)obj)[0];
+ if (type<NUMCLASSES) {
+ /* We have a normal object */
+ int size=classsize[type];
+#ifdef PRECISE_GC
+ int ptrarray[]={1, (int) gl, (int) obj};
+ struct ___Object___ * newobj=mygcmalloc((struct garbagelist *)ptrarray, size);
+#else
+ struct ___Object___ * newobj=FREEMALLOC(size);
+#endif
+#ifdef PRECISE_GC
+ memcpy(newobj, (struct ___Object___ *) ptrarray[2], size);
+ ((struct ___Object___*)ptrarray[2])->___localcopy___=newobj;
+#else
+ memcpy(newobj, obj, size);
+ obj->___localcopy___=newobj;
+#endif
+ } else {
+ /* We have an array */
+ struct ArrayObject *ao=(struct ArrayObject *)obj;
+ int elementsize=classsize[type];
+ int length=ao->___length___;
+ int size=sizeof(struct ArrayObject)+length*elementsize;
+#ifdef PRECISE_GC
+ int ptrarray[]={1, (int) gl, (int) obj};
+ struct ___Object___ * newobj=mygcmalloc((struct garbagelist *)ptrarray, size);
+#else
+ struct ___Object___ * newobj=FREEMALLOC(size);
+#endif
+#ifdef PRECISE_GC
+ memcpy(newobj, (struct ___Object___ *) ptrarray[2], size);
+ ((struct ___Object___*)ptrarray[2])->___localcopy___=newobj;
+#else
+ memcpy(newobj, obj, size);
+ obj->___localcopy___=newobj;
+#endif
+ }
+}
--- /dev/null
+#ifndef LOCALOBJECT_H
+#define LOCALOBJECT_H
+#include "structdefs.h"
+#include "garbage.h"
+void REVERT_OBJ(struct ___Object___ *);
+#define COMMIT_OBJ(obj) obj->___localcopy___=NULL; \
+ obj->___nextobject___=NULL
+
+#ifdef PRECISE_GC
+void COPY_OBJ(struct garbagelist * gl, struct ___Object___ *obj);
+#else
+void COPY_OBJ(struct ___Object___ *obj);
+#endif
+#endif
--- /dev/null
+#include "machinepile.h"
+
+void insertPile(int mid, unsigned int oid, short numoffset, short *offset, prefetchpile_t **head) {
+ prefetchpile_t *ptr;
+ objpile_t *objnode;
+ unsigned int *oidarray;
+ objpile_t **tmp;
+
+ //Loop through the machines
+ for(; 1; head=&((*head)->next)) {
+ int tmid;
+ if ((*head)==NULL||(tmid=(*head)->mid)>mid) {
+ prefetchpile_t * tmp = (prefetchpile_t *) malloc(sizeof(prefetchpile_t));
+ tmp->mid = mid;
+ objnode = malloc(sizeof(objpile_t));
+ objnode->offset = offset;
+ objnode->oid = oid;
+ objnode->numoffset = numoffset;
+ objnode->next = NULL;
+ tmp->objpiles = objnode;
+ tmp->next = *head;
+ *head=tmp;
+ return;
+ }
+
+ //keep looking
+ if (tmid < mid)
+ continue;
+
+ //found mid list
+ for(tmp=&((*head)->objpiles); 1; tmp=&((*tmp)->next)) {
+ int toid;
+ int matchstatus;
+
+ if ((*tmp)==NULL||((toid=(*tmp)->oid)>oid)) {
+ objnode = (objpile_t *) malloc(sizeof(objpile_t));
+ objnode->offset = offset;
+ objnode->oid = oid;
+ objnode->numoffset = numoffset;
+ objnode->next = *tmp;
+ *tmp = objnode;
+ return;
+ }
+ if (toid < oid)
+ continue;
+
+ /* Fill objpiles DS */
+ int i;
+ int onumoffset=(*tmp)->numoffset;
+ short * ooffset=(*tmp)->offset;
+
+ for(i=0; i<numoffset; i++) {
+ if (i>onumoffset) {
+ //We've matched, let's just extend the current prefetch
+ (*tmp)->numoffset=numoffset;
+ (*tmp)->offset=offset;
+ return;
+ }
+ if (ooffset[i]<offset[i]) {
+ goto oidloop;
+ } else if (ooffset[i]>offset[i]) {
+ //Place item before the current one
+ objnode = (objpile_t *) malloc(sizeof(objpile_t));
+ objnode->offset = offset;
+ objnode->oid = oid;
+ objnode->numoffset = numoffset;
+ objnode->next = *tmp;
+ *tmp = objnode;
+ return;
+ }
+ }
+ //if we get to the end, we're already covered by this prefetch
+ return;
+oidloop:
+ ;
+ }
+ }
+
+
+}
--- /dev/null
+#ifndef _MACHINEPILE_H_
+#define _MACHINEPILE_H_
+
+#include "mcpileq.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+void insertPile(int, unsigned int, short, short *, prefetchpile_t **);
+
+#endif
--- /dev/null
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "dstm.h"
+
+#define size 1000000
+
+
+obj_addr_table_t mlut;
+int classsize[]={sizeof(int),sizeof(char),sizeof(short), sizeof(void *)};
+
+int main() {
+ int i;
+
+ dstm_init();
+ create_objstr(size);
+ createHash(&mlut, HASH_SIZE, 0.75);
+
+ for(i=0; i< 4 ; i++) {
+ createObject(i);
+ }
+
+ createObject(3);
+ return 0;
+}
--- /dev/null
+#include "mcpileq.h"
+
+mcpileq_t mcqueue; //Global queue
+
+void mcpileqInit(void) {
+ /* Initialize machine queue that containing prefetch oids and offset values sorted by remote machineid */
+ mcqueue.front = mcqueue.rear = NULL;
+ //Intiliaze and set machile pile queue's mutex attribute
+ pthread_mutexattr_init(&mcqueue.qlockattr);
+ pthread_mutexattr_settype(&mcqueue.qlockattr, PTHREAD_MUTEX_RECURSIVE_NP);
+ pthread_mutex_init(&mcqueue.qlock,&mcqueue.qlockattr);
+ pthread_cond_init(&mcqueue.qcond, NULL);
+}
+
+/* Insert to the rear of machine pile queue */
+void mcpileenqueue(prefetchpile_t *node, prefetchpile_t *tail) {
+ if(mcqueue.front == NULL) {
+ mcqueue.front = node;
+ mcqueue.rear = tail;
+ } else {
+ mcqueue.rear->next = node;
+ mcqueue.rear = tail;
+ }
+}
+
+/* Return the node pointed to by the front ptr of the queue */
+prefetchpile_t *mcpiledequeue(void) {
+ prefetchpile_t *retnode=mcqueue.front;
+ if(retnode == NULL) {
+ printf("Machine pile queue empty: Underflow %s %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+ mcqueue.front = retnode->next;
+ if (mcqueue.front == NULL)
+ mcqueue.rear = NULL;
+ retnode->next = NULL;
+
+ return retnode;
+}
+
+void mcpiledisplay() {
+ int mid;
+
+ prefetchpile_t *tmp = mcqueue.front;
+ while(tmp != NULL) {
+ printf("Remote machine id = %d\n", tmp->mid);
+ tmp = tmp->next;
+ }
+}
+
+/* Delete prefetchpile_t and everything it points to */
+void mcdealloc(prefetchpile_t *node) {
+ prefetchpile_t *prefetchpile_ptr;
+ prefetchpile_t *prefetchpile_next_ptr;
+ objpile_t *objpile_ptr;
+ objpile_t *objpile_next_ptr;
+
+ prefetchpile_ptr = node;
+
+ while (prefetchpile_ptr != NULL) {
+ prefetchpile_next_ptr = prefetchpile_ptr;
+ while(prefetchpile_ptr->objpiles != NULL) {
+ //offsets aren't owned by us, so we don't free them.
+ objpile_ptr = prefetchpile_ptr->objpiles;
+ prefetchpile_ptr->objpiles = objpile_ptr->next;
+ free(objpile_ptr);
+ }
+ prefetchpile_ptr = prefetchpile_next_ptr->next;
+ free(prefetchpile_next_ptr);
+ }
+}
--- /dev/null
+#ifndef _MCPILEQ_H_
+#define _MCPILEQ_H_
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+//Structure to make machine groups when prefetching
+typedef struct objpile {
+ unsigned int oid;
+ short numoffset;
+ short *offset;
+ struct objpile *next;
+} objpile_t;
+
+//Structure for prefetching tuples generated by the compiler
+typedef struct prefetchpile {
+ unsigned int mid;
+ objpile_t *objpiles;
+ struct prefetchpile *next;
+} prefetchpile_t;
+
+typedef struct mcpileq {
+ prefetchpile_t *front, *rear;
+ pthread_mutex_t qlock;
+ pthread_mutexattr_t qlockattr;
+ pthread_cond_t qcond;
+} mcpileq_t;
+
+void mcpileqInit(void);
+void mcpileenqueue(prefetchpile_t *, prefetchpile_t *);
+prefetchpile_t *mcpiledequeue(void);
+void mcpiledisplay();
+void mcdealloc(prefetchpile_t *);
+
+#endif
--- /dev/null
+#include "mlookup.h"
+
+mhashtable_t mlookup; //Global hash table
+
+// Creates a machine lookup table with size =" size"
+unsigned int mhashCreate(unsigned int size, double loadfactor) {
+ mhashlistnode_t *nodes;
+ // Allocate space for the hash table
+ if((nodes = calloc(size, sizeof(mhashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ mlookup.table = nodes;
+ mlookup.size = size;
+ mlookup.threshold=size*loadfactor;
+ mlookup.mask = (size << 1) -1;
+ mlookup.numelements = 0; // Initial number of elements in the hash
+ mlookup.loadfactor = loadfactor;
+ //Initialize the pthread_mutex variable
+ pthread_mutex_init(&mlookup.locktable, NULL);
+ return 0;
+}
+
+// Assign to keys to bins inside hash table
+unsigned int mhashFunction(unsigned int key) {
+ return( key & mlookup.mask) >>1;
+}
+
+// Insert value and key mapping into the hash table
+void mhashInsert(unsigned int key, void *val) {
+ mhashlistnode_t *ptr, *node;
+
+ pthread_mutex_lock(&mlookup.locktable);
+ if (mlookup.numelements > mlookup.threshold) {
+ //Resize Table
+ unsigned int newsize = mlookup.size << 1;
+ mhashResize(newsize);
+ }
+
+ ptr = &mlookup.table[(key & mlookup.mask) >>1];
+ mlookup.numelements++;
+
+
+ if(ptr->key ==0) {
+ ptr->key=key;
+ ptr->val=val;
+ } else { // Insert in the beginning of linked list
+ node = calloc(1, sizeof(mhashlistnode_t));
+ node->key = key;
+ node->val = val;
+ node->next = ptr->next;
+ ptr->next=node;
+ }
+ pthread_mutex_unlock(&mlookup.locktable);
+}
+
+// Return val for a given key in the hash table
+void *mhashSearch(unsigned int key) {
+ int index;
+ mhashlistnode_t *node;
+ pthread_mutex_lock(&mlookup.locktable);
+ node = &mlookup.table[(key & mlookup.mask)>>1];
+ do {
+ if(node->key == key) {
+ void * tmp=node->val;
+ pthread_mutex_unlock(&mlookup.locktable);
+ return tmp;
+ }
+ node = node->next;
+ } while (node!=NULL);
+
+ pthread_mutex_unlock(&mlookup.locktable);
+ return NULL;
+}
+
+// Remove an entry from the hash table
+unsigned int mhashRemove(unsigned int key) {
+ int index;
+ mhashlistnode_t *curr, *prev;
+ mhashlistnode_t *ptr, *node;
+
+ pthread_mutex_lock(&mlookup.locktable);
+ ptr = mlookup.table;
+ index = mhashFunction(key);
+ curr = &ptr[index];
+ for (; curr != NULL; curr = curr->next) {
+ if (curr->key == key) { // Find a match in the hash table
+ mlookup.numelements--; // Decrement the number of elements in the global hashtable
+ if ((curr == &ptr[index]) && (curr->next == NULL)) { // Delete the first item inside the hashtable with no linked list of mhashlistnode_t
+ curr->key = 0;
+ curr->val = NULL;
+ } else if ((curr == &ptr[index]) && (curr->next != NULL)) { //Delete the first item with a linked list of mhashlistnode_t connected
+ curr->key = curr->next->key;
+ curr->val = curr->next->val;
+ node = curr->next;
+ curr->next = curr->next->next;
+ free(node);
+ } else { // Regular delete from linked listed
+ prev->next = curr->next;
+ free(curr);
+ }
+ pthread_mutex_unlock(&mlookup.locktable);
+ return 0;
+ }
+ prev = curr;
+ }
+ pthread_mutex_unlock(&mlookup.locktable);
+ return 1;
+}
+
+
+
+// Resize table
+unsigned int mhashResize(unsigned int newsize) {
+ mhashlistnode_t *node, *ptr, *curr; // curr and next keep track of the current and the next mhashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the mhashlistnode_t for each bin in hashtable
+ unsigned int i,index;
+ unsigned int mask;
+
+ ptr = mlookup.table;
+ oldsize = mlookup.size;
+
+ if((node = calloc(newsize, sizeof(mhashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ mlookup.table = node; //Update the global hashtable upon resize()
+ mlookup.size = newsize;
+ mlookup.threshold=newsize*mlookup.loadfactor;
+ mask=mlookup.mask = (newsize << 1)-1;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ do {
+ unsigned int key;
+ mhashlistnode_t *tmp,*next;
+
+ if ((key=curr->key) == 0) { //Exit inner loop if there the first element for a given bin/index is NULL
+ break; //key = val =0 for element if not present within the hash table
+ }
+ next = curr->next;
+ index = (key & mask) >>1;
+ tmp=&mlookup.table[index];
+
+ // Insert into the new table
+ if(tmp->key ==0) {
+ tmp->key=curr->key;
+ tmp->val=curr->val;
+ if (!isfirst)
+ free(curr);
+ } /*
+
+ NOTE: Add this case if you change this...
+ This case currently never happens because of the way things rehash....
+else if (isfirst) {
+ mhashlistnode_t *newnode = calloc(1, sizeof(mhashlistnode_t));
+ newnode->key = curr->key;
+ newnode->val = curr->val;
+ newnode->next = tmp->next;
+ tmp->next=newnode;
+ } */
+ else {
+ curr->next=tmp->next;
+ tmp->next=curr;
+ }
+ isfirst = 0;
+ curr = next;
+ } while(curr!=NULL);
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+unsigned int *mhashGetKeys(unsigned int *numKeys) {
+ unsigned int *keys;
+ int i, keyindex;
+ mhashlistnode_t *curr;
+
+ pthread_mutex_lock(&mlookup.locktable);
+
+ *numKeys = mlookup.numelements;
+ keys = calloc(*numKeys, sizeof(unsigned int));
+
+ keyindex = 0;
+ for (i = 0; i < mlookup.size; i++) {
+ if (mlookup.table[i].key != 0) {
+ curr = &mlookup.table[i];
+ while (curr != NULL) {
+ keys[keyindex++] = curr->key;
+ curr = curr->next;
+ }
+ }
+ }
+
+ if (keyindex != *numKeys)
+ printf("mhashGetKeys(): WARNING: incorrect mlookup.numelements value!\n");
+
+ pthread_mutex_unlock(&mlookup.locktable);
+ return keys;
+}
+
+int mhashGetDuplicate(void **dupeptr, int backup) { //how big?
+ printf("%s-> Start\n", __func__);
+ unsigned int numdupe = 0;
+
+// ok let's do this;
+ unsigned int oidsdupe[mlookup.size];
+ int size = 0, tempsize = 0, i = 0;
+ objheader_t *header;
+
+ mhashlistnode_t *node;
+// go through object store;
+// track sizes, oids, and num
+ pthread_mutex_lock(&mlookup.locktable);
+
+ for(i = 0; i < mlookup.size; i++) {
+ if (mlookup.table[i].key != 0) {
+ node = &mlookup.table[i];
+ while(node != NULL) { // no nodes
+ header = (objheader_t *)node->val;
+ if((header->isBackup && backup) || (!header->isBackup && !backup)) {
+ oidsdupe[numdupe++] = OID(header);
+ GETSIZE(tempsize, header);
+ size += tempsize + sizeof(objheader_t);
+ }
+ node = node->next;
+ }
+ }
+ }
+
+ pthread_mutex_unlock(&mlookup.locktable);
+ printf("%s-> size:%d, numdupe:%d\n", __func__, size, numdupe);
+
+ //i got sizes, oids, and num now
+
+ if(((*dupeptr) = calloc(1, sizeof(unsigned int)+sizeof(int)+size)) == NULL) {
+ printf("calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
+ return;
+ }
+
+// for each oid in oiddupe[] get object and format
+ void *ptr = *(dupeptr);
+ *((unsigned int *)(ptr)) = numdupe;
+ ptr += sizeof(unsigned int);
+ *((int *)(ptr)) = size;
+ ptr += sizeof(int);
+ for(i = 0; i < numdupe; i++) {
+ printf("%s-> oid being backed:%u\n", __func__, oidsdupe[i]);
+ header = mhashSearch(oidsdupe[i]);
+ printf("new header oid:%d, version:%d\n", OID(header), header->version);
+ printf("STATUSPTR(header):%u, STATUS:%d\n", STATUSPTR(header), STATUS(header));
+/* if(write_trylock(STATUSPTR(header))) {
+ printf("this object is not locked\n");
+
+ write_unlock(STATUSPTR(header));
+ }
+ else
+ printf("its locked\n");*/
+
+
+ GETSIZE(tempsize, header);
+ tempsize += sizeof(objheader_t);
+ memcpy(ptr, header, tempsize); //*ptr = header maybe wont work, use memcopy instead probably
+ ptr += tempsize;
+ }
+
+/* printf("dupeptrfirstvalue:%d\n", *((unsigned int *)(dupeptr)));
+ dupeptr += sizeof(unsigned int);
+ printf("dupeptrfirstvalue:%d\n", *((int *)(dupeptr)));*/
+
+
+ printf("%s-> End\n", __func__);
+ return (sizeof(unsigned int) + sizeof(int) + size);
+}
+
--- /dev/null
+#ifndef _MLOOKUP_H_
+#define _MLOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+
+#define MLOADFACTOR 0.25
+#define MHASH_SIZE 1024
+
+#ifdef RECOVERY
+#include "dstm.h"
+#include "dsmlock.h"
+#endif
+
+typedef struct mhashlistnode {
+ unsigned int key;
+ void *val; //this can be cast to another type or used to point to a larger structure
+ struct mhashlistnode *next;
+} mhashlistnode_t;
+
+typedef struct mhashtable {
+ mhashlistnode_t *table; // points to beginning of hash table
+ unsigned int size;
+ unsigned int mask;
+ unsigned int numelements;
+ unsigned int threshold;
+ double loadfactor;
+ pthread_mutex_t locktable;
+} mhashtable_t;
+
+unsigned int mhashCreate(unsigned int size, double loadfactor);
+unsigned int mhashFunction(unsigned int key);
+void mhashInsert(unsigned int key, void *val);
+void *mhashSearch(unsigned int key); //returns val, NULL if not found
+unsigned int mhashRemove(unsigned int key); //returns -1 if not found
+unsigned int mhashResize(unsigned int newsize);
+unsigned int *mhashGetKeys(unsigned int *numKeys);
+
+void mhashPrint();
+
+#endif
+
--- /dev/null
+#include "dstm.h"
+#include "gCollect.h"
+
+#define OSUSED(x) (((unsigned int)(x)->top)-((unsigned int) (x+1)))
+#define OSFREE(x) ((x)->size-OSUSED(x))
+
+objstr_t *objstrCreate(unsigned int size) {
+ objstr_t *tmp;
+ if((tmp = calloc(1, (sizeof(objstr_t) + size))) == NULL) {
+ printf("%s() Calloc error at line %d, %s\n", __func__, __LINE__, __FILE__);
+ return NULL;
+ }
+ tmp->size = size;
+ tmp->next = NULL;
+ tmp->top = tmp + 1; //points to end of objstr_t structure!
+ return tmp;
+}
+
+//free entire list, starting at store
+void objstrDelete(objstr_t *store) {
+ objstr_t *tmp;
+ while (store != NULL) {
+ tmp = store->next;
+ free(store);
+ store = tmp;
+ }
+ return;
+}
+
+void *objstrAlloc(objstr_t **osptr, unsigned int size) {
+ void *tmp;
+ int i=0;
+ objstr_t *store=*osptr;
+ if ((size&7)!=0) {
+ size+=(8-(size&7));
+ }
+
+ for(;i<3;i++) {
+ if (OSFREE(store)>=size) {
+ tmp=store->top;
+ store->top +=size;
+ return tmp;
+ }
+ if ((store=store->next)==NULL)
+ break;
+ }
+
+ {
+ unsigned int newsize=size>DEFAULT_OBJ_STORE_SIZE?size:DEFAULT_OBJ_STORE_SIZE;
+ objstr_t *os=(objstr_t *)calloc(1,(sizeof(objstr_t) + newsize));
+ void *ptr=&os[1];
+ os->next=*osptr;
+ (*osptr)=os;
+ os->size=newsize;
+ os->top=((char *)ptr)+size;
+ return ptr;
+ }
+}
--- /dev/null
+#include "plookup.h"
+#include "ip.h"
+extern int classsize[];
+
+//NOTE: "pile" ptr points to the head of the linked list of the machine pile data structures
+
+/* This function creates a new pile data structure to hold
+ * obj ids of objects modified or read inside a transaction,
+ * no of objects read and no of objects modified
+ * that belong to a single machine */
+
+plistnode_t *pCreate(int objects) {
+ plistnode_t *pile;
+
+ //Create main structure
+ if((pile = calloc(1, sizeof(plistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+ if ((pile->oidmod = calloc(objects, sizeof(unsigned int))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ free(pile);
+ return NULL;
+ }
+ if ((pile->oidcreated = calloc(objects, sizeof(unsigned int))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ free(pile);
+ free(pile->oidmod);
+ return NULL;
+ }
+ if ((pile->objread = calloc(objects, sizeof(unsigned int) + sizeof(short))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ free(pile);
+ free(pile->oidmod);
+ free(pile->oidcreated);
+ return NULL;
+ }
+
+ pile->nummod = pile->numread = pile->numcreated = pile->sum_bytes = pile->mid = 0;
+ pile->next = NULL;
+ return pile;
+}
+
+//Count the number of machine piles
+int pCount(plistnode_t *pile) {
+ plistnode_t *tmp;
+ int pcount = 0;
+ tmp = pile;
+ while(tmp != NULL) {
+ pcount++;
+ tmp = tmp->next;
+ }
+ return pcount;
+}
+
+//Make a list of mid's for each machine group
+int pListMid(plistnode_t *pile, unsigned int *list) {
+ int i = 0;
+ plistnode_t *tmp;
+ tmp = pile;
+ char ip[16];
+ while (tmp != NULL) {
+ list[i] = tmp->mid;
+ i++;
+ tmp = tmp->next;
+ }
+ return 0;
+}
+
+//Delete the entire pile
+void pDelete(plistnode_t *pile) {
+ plistnode_t *next, *tmp;
+ tmp = pile;
+ while(tmp != NULL) {
+ next = tmp->next;
+ free(tmp->oidmod);
+ free(tmp->oidcreated);
+ free(tmp->objread);
+ free(tmp);
+ tmp = next;
+ }
+ return;
+}
--- /dev/null
+#ifndef _PLOOKUP_H_
+#define _PLOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* This structure is created using a transaction record.
+ * It is filled out with pile information necessary for
+ * participants involved in a transaction. */
+typedef struct plistnode {
+ unsigned int mid;
+ unsigned int numread; /* no of objects modified */
+ unsigned int nummod; /* no of objects read */
+ unsigned int numcreated; /* no of objects created */
+ int sum_bytes; /* total bytes of objects modified */
+ char *objread; /* Pointer to array containing oids of objects read and their version numbers*/
+ unsigned int *oidmod; /* Pointer to array containing oids of modified objects */
+ unsigned int *oidcreated; /* Pointer to array containing oids of newly created objects */
+ struct plistnode *next;
+} plistnode_t;
+
+plistnode_t *pCreate(int);
+int pCount(plistnode_t *pile);
+int pListMid(plistnode_t *pile, unsigned int *list);
+void pDelete(plistnode_t *pile);
+
+#endif
+
--- /dev/null
+#include "prefetch.h"
+#include "prelookup.h"
+#include "sockpool.h"
+#include "gCollect.h"
+
+extern sockPoolHashTable_t *transPrefetchSockPool;
+extern unsigned int myIpAddr;
+extern sockPoolHashTable_t *transPResponseSocketPool;
+extern pthread_mutex_t prefetchcache_mutex;
+extern prehashtable_t pflookup;
+
+
+// Function for new prefetch call
+void rangePrefetch(unsigned int oid, short numoffset, short *offsets) {
+ /* Allocate memory in prefetch queue and push the block there */
+ int qnodesize = sizeof(unsigned int) + sizeof(unsigned short) + numoffset * sizeof(short);
+ char *node = (char *) getmemory(qnodesize);
+ if(node == NULL)
+ return;
+ int index = 0;
+ ((unsigned int *)node)[0] = oid;
+ index = index + (sizeof(unsigned int));
+ *((short *)(node+index)) = numoffset;
+ index = index + (sizeof(short));
+ memcpy(node+index, offsets, numoffset * sizeof(short));
+ movehead(qnodesize);
+}
+
+void *transPrefetchNew() {
+ while(1) {
+ /* Read from prefetch queue */
+ void *node = gettail();
+
+ /* Check tuples if they are found locally */
+ perMcPrefetchList_t* pilehead = processLocal(node);
+
+ if (pilehead!=NULL) {
+
+ /* Send Prefetch Request */
+ perMcPrefetchList_t *ptr = pilehead;
+ while(ptr != NULL) {
+ // Get sock from shared pool
+ int sd = getSock2(transPrefetchSockPool, ptr->mid);
+ sendRangePrefetchReq(ptr, sd, myIpAddr);
+ ptr = ptr->next;
+ }
+
+ /* Deallocated pilehead */
+ proPrefetchQDealloc(pilehead);
+ }
+ // Deallocate the prefetch queue pile node
+ inctail();
+ }
+}
+
+perMcPrefetchList_t *processLocal(char *ptr) {
+ unsigned int oid = *(GET_OID(ptr));
+ short numoffset = *(GET_NUM_OFFSETS(ptr));
+ short *offsetarray = GET_OFFSETS(ptr);
+ int top;
+ unsigned int dfsList[numoffset];
+ int offstop=numoffset-2;
+
+ /* Initialize */
+ perMcPrefetchList_t *head = NULL;
+
+ objheader_t * header = searchObj(oid);
+ if (header==NULL) {
+ //forward prefetch
+ int machinenum = lhashSearch(oid);
+ insertPrefetch(machinenum, oid, numoffset, offsetarray, &head);
+ return head;
+ }
+ dfsList[0]=oid;
+ dfsList[1]=0;
+
+
+ //Start searching the dfsList
+ for(top=0; top>=0;) {
+ oid=getNextOid(header, offsetarray, dfsList, top);
+ if (oid&1) {
+ int oldisField=TYPE(header) < NUMCLASSES;
+ top+=2;
+ dfsList[top]=oid;
+ dfsList[top+1]=0;
+ header=searchObj(oid);
+ if (header==NULL) {
+ //forward prefetch
+ int machinenum = lhashSearch(oid);
+
+ if (oldisField&&(dfsList[top-1]!=GET_RANGE(offsetarray[top+1])))
+ insertPrefetch(machinenum, oid, 2+numoffset-top, &offsetarray[top-2], &head);
+ else
+ insertPrefetch(machinenum, oid, numoffset-top, &offsetarray[top], &head);
+ } else if (top<offstop)
+ //okay to continue going down
+ continue;
+ } else if (oid==2) {
+ //send prefetch first
+ int objindex=top+2;
+ int machinenum = lhashSearch(dfsList[objindex]);
+ insertPrefetch(machinenum, dfsList[objindex], numoffset-top, &offsetarray[top], &head);
+ }
+ //oid is 0
+ //go backwards until we can increment
+ do {
+ do {
+ top-=2;
+ if (top<0)
+ return head;
+ } while(dfsList[top+1] == GET_RANGE(offsetarray[top + 3]));
+
+ header=searchObj(dfsList[top]);
+ //header shouldn't be null unless the object moves away, but allow
+ //ourselves the option to just continue on if we lose the object
+ } while(header==NULL);
+ //increment
+ dfsList[top+1]++;
+ }
+ return head;
+}
+
+#define PBUFFERSIZE 16384
+//#define PBUFFERSIZE 8192 //Used only for Moldyn benchmark
+
+
+perMcPrefetchList_t *processRemote(unsigned int oid, short * offsetarray, int sd, short numoffset) {
+ int top;
+ unsigned int dfsList[numoffset];
+ char buffer[PBUFFERSIZE];
+ int bufoffset=0;
+
+ /* Initialize */
+ perMcPrefetchList_t *head = NULL;
+
+ objheader_t * header = searchObj(oid);
+ int offstop=numoffset-2;
+ if (header==NULL) {
+ //forward prefetch
+ int machinenum = lhashSearch(oid);
+ insertPrefetch(machinenum, oid, numoffset, offsetarray, &head);
+ return head;
+ } else {
+ sendOidFound(header, oid, sd, buffer, &bufoffset);
+ }
+
+ dfsList[0]=oid;
+ dfsList[1]=0;
+
+ //Start searching the dfsList
+ for(top=0; top>=0;) {
+ oid=getNextOid(header, offsetarray, dfsList, top);
+ if (oid&1) {
+ int oldisField=TYPE(header) < NUMCLASSES;
+ top+=2;
+ dfsList[top]=oid;
+ dfsList[top+1]=0;
+ header=searchObj(oid);
+ if (header==NULL) {
+ //forward prefetch
+ int machinenum = lhashSearch(oid);
+ if (oldisField&&(dfsList[top-1]!=GET_RANGE(offsetarray[top+1])))
+ insertPrefetch(machinenum, oid, 2+numoffset-top, &offsetarray[top-2], &head);
+ else
+ insertPrefetch(machinenum, oid, numoffset-top, &offsetarray[top], &head);
+ } else {
+ sendOidFound(header, oid, sd, buffer, &bufoffset);
+ if (top<offstop)
+ //okay to continue going down
+ continue;
+ }
+ } else if (oid==2) {
+ //send prefetch first
+ int objindex=top+2;
+ int machinenum = lhashSearch(dfsList[objindex]);
+ insertPrefetch(machinenum, dfsList[objindex], numoffset-top, &offsetarray[top], &head);
+ }
+ //oid is 0
+ //go backwards until we can increment
+ do {
+ do {
+ top-=2;
+ if (top<0) {
+ flushResponses(sd, buffer, &bufoffset);
+ return head;
+ }
+ } while(dfsList[top+1] == GET_RANGE(offsetarray[top + 3]));
+
+ header=searchObj(dfsList[top]);
+ //header shouldn't be null unless the object moves away, but allow
+ //ourselves the option to just continue on if we lose the object
+ } while(header==NULL);
+ //increment
+ dfsList[top+1]++;
+ }
+ flushResponses(sd, buffer, &bufoffset);
+ return head;
+}
+
+
+INLINE objheader_t *searchObj(unsigned int oid) {
+ objheader_t *header;
+ if ((header = (objheader_t *)mhashSearch(oid)) != NULL) {
+ return header;
+ } else
+ return prehashSearch(oid);
+}
+
+/* Delete perMcPrefetchList_t and everything it points to */
+void proPrefetchQDealloc(perMcPrefetchList_t *node) {
+ while (node != NULL) {
+ perMcPrefetchList_t * prefetchpile_next_ptr = node;
+ while(node->list != NULL) {
+ //offsets aren't owned by us, so we don't free them.
+ objOffsetPile_t * objpile_ptr = node->list;
+ node->list = objpile_ptr->next;
+ free(objpile_ptr);
+ }
+ node = prefetchpile_next_ptr->next;
+ free(prefetchpile_next_ptr);
+ }
+}
+
+void insertPrefetch(int mid, unsigned int oid, short numoffset, short *offsets, perMcPrefetchList_t **head) {
+ perMcPrefetchList_t *ptr;
+ objOffsetPile_t *objnode;
+ objOffsetPile_t **tmp;
+
+ char ptr1[50];
+ midtoIP(mid, ptr1);
+ //Loop through the machines
+ for(; 1; head=&((*head)->next)) {
+ int tmid;
+ if ((*head)==NULL||(tmid=(*head)->mid)>mid) {
+ perMcPrefetchList_t * tmp = (perMcPrefetchList_t *) malloc(sizeof(perMcPrefetchList_t));
+ tmp->mid = mid;
+ objnode = malloc(sizeof(objOffsetPile_t));
+ objnode->offsets = offsets;
+ objnode->oid = oid;
+ objnode->numoffset = numoffset;
+ objnode->next = NULL;
+ tmp->list = objnode;
+ tmp->next = *head;
+ *head=tmp;
+ return;
+ }
+
+ //keep looking
+ if (tmid < mid)
+ continue;
+
+ //found mid list
+ for(tmp=&((*head)->list); 1; tmp=&((*tmp)->next)) {
+ int toid;
+ int matchstatus;
+
+ if ((*tmp)==NULL||((toid=(*tmp)->oid)>oid)) {
+ objnode = (objOffsetPile_t *) malloc(sizeof(objOffsetPile_t));
+ objnode->offsets = offsets;
+ objnode->oid = oid;
+ objnode->numoffset = numoffset;
+ objnode->next = *tmp;
+ *tmp = objnode;
+ return;
+ }
+ if (toid < oid)
+ continue;
+
+ /* Fill list DS */
+ int i;
+ int onumoffset=(*tmp)->numoffset;
+ short * ooffset=(*tmp)->offsets;
+
+ for(i=0; i<numoffset; i++) {
+ if (i>onumoffset) {
+ //We've matched, let's just extend the current prefetch
+ (*tmp)->numoffset=numoffset;
+ (*tmp)->offsets=offsets;
+ return;
+ }
+ if (ooffset[i]<offsets[i]) {
+ goto oidloop;
+ } else if (ooffset[i]>offsets[i]) {
+ //Place item before the current one
+ objnode = (objOffsetPile_t *) malloc(sizeof(objOffsetPile_t));
+ objnode->offsets = offsets;
+ objnode->oid = oid;
+ objnode->numoffset = numoffset;
+ objnode->next = *tmp;
+ *tmp = objnode;
+ return;
+ }
+ }
+ //if we get to the end, we're already covered by this prefetch
+ return;
+oidloop:
+ ;
+ }
+ }
+}
+
+void sendRangePrefetchReq(perMcPrefetchList_t *mcpilenode, int sd, unsigned int mid) {
+ int len, endpair;
+ char control;
+ objOffsetPile_t *tmp;
+
+ /* Send TRANS_PREFETCH control message */
+ control = TRANS_PREFETCH;
+ send_data(sd, &control, sizeof(char));
+
+ /* Send Oids and offsets in pairs */
+ tmp = mcpilenode->list;
+ while(tmp != NULL) {
+ len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
+ char oidnoffset[len];
+ char *buf=oidnoffset;
+ *((int*)buf) = tmp->numoffset;
+ buf+=sizeof(int);
+ *((unsigned int *)buf) = tmp->oid;
+ buf+=sizeof(unsigned int);
+ *((unsigned int *)buf) = mid;
+ buf += sizeof(unsigned int);
+ memcpy(buf, tmp->offsets, (tmp->numoffset)*sizeof(short));
+ send_data(sd, oidnoffset, len);
+ tmp = tmp->next;
+ }
+
+ /* Send a special char -1 to represent the end of sending oids + offset pair to remote machine */
+ endpair = -1;
+ send_data(sd, &endpair, sizeof(int));
+ return;
+}
+
+int getRangePrefetchResponse(int sd) {
+ int length = 0;
+ recv_data(sd, &length, sizeof(int));
+ int size = length - sizeof(int);
+ char recvbuffer[size];
+ recv_data(sd, recvbuffer, size);
+ char control = *((char *) recvbuffer);
+ unsigned int oid;
+ if(control == OBJECT_FOUND) {
+ size = size - (sizeof(char) + sizeof(unsigned int));
+ pthread_mutex_lock(&prefetchcache_mutex);
+ void *ptr;
+ if((ptr = prefetchobjstrAlloc(size)) == NULL) {
+ printf("%s() Error: objstrAlloc error for copying into prefetch cache in line %d at %s\n",
+ __func__, __LINE__, __FILE__);
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ return -1;
+ }
+
+ void *tmp=ptr;
+ int osize=size;
+ pthread_mutex_unlock(&prefetchcache_mutex);
+
+ memcpy(ptr, recvbuffer + sizeof(char) + sizeof(unsigned int), size);
+
+ //ignore oid value...we'll get it from the object
+
+ while(size>0) {
+ unsigned int objsize;
+ GETSIZE(objsize, ptr);
+ STATUS(ptr)=0;
+ oid=OID(ptr);
+ objsize+=sizeof(objheader_t);
+
+ /* Insert into prefetch hash lookup table */
+ void * oldptr;
+ if((oldptr = prehashSearch(oid)) != NULL) {
+ if(((objheader_t *)oldptr)->version <= ((objheader_t *)ptr)->version) {
+ prehashRemove(oid);
+ prehashInsert(oid, ptr);
+ }
+ } else {
+ prehashInsert(oid, ptr);
+ }
+ ptr=(void *)(((unsigned int)ptr)+objsize);
+ size-=objsize;
+ }
+
+ pthread_mutex_lock(&pflookup.lock);
+ pthread_cond_broadcast(&pflookup.cond);
+ pthread_mutex_unlock(&pflookup.lock);
+ } else if(control == OBJECT_NOT_FOUND) {
+ oid = *((unsigned int *)(recvbuffer + sizeof(char)));
+ } else {
+ printf("%s() Error: in Decoding the control value %d, %s\n", __func__, __LINE__, __FILE__);
+ }
+ return 0;
+}
+
+int rangePrefetchReq(int acceptfd) {
+ int numoffset, sd = -1;
+ unsigned int baseoid, mid = -1;
+ oidmidpair_t oidmid;
+
+ while (1) {
+ recv_data(acceptfd, &numoffset, sizeof(int));
+ if(numoffset == -1)
+ break;
+ recv_data(acceptfd, &oidmid, 2*sizeof(unsigned int));
+ baseoid = oidmid.oid;
+ if(mid != oidmid.mid) {
+ if(mid!= -1)
+ freeSockWithLock(transPResponseSocketPool, mid, sd);
+ mid = oidmid.mid;
+ sd = getSockWithLock(transPResponseSocketPool, mid);
+ }
+ short offsetsarry[numoffset];
+ recv_data(acceptfd, offsetsarry, numoffset*sizeof(short));
+
+ perMcPrefetchList_t * pilehead=processRemote(baseoid, offsetsarry, sd, numoffset);
+
+ if (pilehead!= NULL) {
+ perMcPrefetchList_t *ptr = pilehead;
+ while(ptr != NULL) {
+ // Get sock from shared pool
+ int sd = getSock2(transPrefetchSockPool, ptr->mid);
+ sendRangePrefetchReq(ptr, sd, mid);
+ ptr = ptr->next;
+ }
+
+ proPrefetchQDealloc(pilehead);
+ }
+ }
+
+ //Release socket
+ if(mid!=-1)
+ freeSockWithLock(transPResponseSocketPool, mid, sd);
+ return 0;
+}
+
+
+unsigned int getNextOid(objheader_t * header, short * offsetarray, unsigned int *dfsList, int top) {
+ int startindex= offsetarray[top+2];
+ int currcount = dfsList[top+1];
+ int range = GET_RANGE(offsetarray[top + 3]);
+
+ if(TYPE(header) >= NUMCLASSES) {
+ //Array case
+ struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
+ int stride = GET_STRIDE(offsetarray[top + 3])+1;
+ int length = ao->___length___;
+ int currindex;
+ //Check direction of stride
+ if(GET_STRIDEINC(offsetarray[top + 3])) {
+ //Negative
+ currindex=startindex-stride*currcount;
+ if (currindex<0)
+ return 0;
+
+ //Also have to check whether we will eventually index into array
+ if (currindex>=length) {
+ //Skip to the point that we will index into array
+ int delta=(currindex-length-1)/stride+1; //-1, +1 is to make sure that it rounds up
+ if ((delta+currcount)>range)
+ return 0;
+ currindex-=delta*stride;
+ }
+ } else {
+ //Going positive, compute current index
+ currindex=startindex+stride*currcount;
+ if(currindex >= length)
+ return 0;
+ }
+
+ int elementsize = classsize[TYPE(header)];
+ return *((unsigned int *)(((char *)ao) + sizeof(struct ArrayObject) + elementsize*currindex));
+ } else {
+ //handle fields
+
+ if(currcount!=0 & range != 0) {
+ //go to the next offset
+ header=searchObj(dfsList[top+2]);
+ if (header==NULL)
+ return 2;
+ }
+
+ return *((unsigned int *)(((char *)header) + sizeof(objheader_t) + startindex));
+ }
+}
+
+void flushResponses(int sd, char * buffer, int * bufoffset) {
+ if ((*bufoffset)!=0) {
+ send_data(sd, buffer, *bufoffset);
+ *bufoffset=0;
+ }
+}
+
+int sendOidFound(objheader_t * header, unsigned int oid, int sd, char *buffer, int *bufoffset) {
+ int incr;
+ int objsize;
+ GETSIZE(objsize, header);
+ int size = sizeof(objheader_t) + objsize;
+ char *sendbuffer;
+
+ if ((incr=(*bufoffset))==0) {
+ buffer[incr] = TRANS_PREFETCH_RESPONSE;
+ incr+=sizeof(char);
+ *((int *)(buffer + incr)) = size+sizeof(int)+sizeof(char)+sizeof(unsigned int);
+ incr += sizeof(int);
+ *((char *)(buffer + incr)) = OBJECT_FOUND;
+ incr += sizeof(char);
+ *((unsigned int *)(buffer + incr)) = oid;
+ incr += sizeof(unsigned int);
+ } else
+ *((int *)(buffer+sizeof(char)))+=size;
+
+ if ((incr+size)<PBUFFERSIZE) {
+ //don't need to allocate, just copy
+ sendbuffer=buffer;
+ (*bufoffset)=incr+size;
+ } else {
+ sendbuffer=alloca(size+incr);
+ memcpy(sendbuffer, buffer, incr);
+ *bufoffset=0;
+ }
+
+ memcpy(sendbuffer + incr, header, size);
+ if ((*bufoffset)==0)
+ send_data(sd, sendbuffer, size+incr);
+ return 0;
+}
+
+int sendOidNotFound(unsigned int oid, int sd) {
+ int size = sizeof(int) + sizeof(char) + sizeof(unsigned int);
+ char sendbuffer[size];
+ *((int *)sendbuffer) = size;
+ *((char *)(sendbuffer + sizeof(int))) = OBJECT_NOT_FOUND;
+ *((unsigned int *)(sendbuffer + sizeof(int) + sizeof(unsigned int))) = oid;
+ char control = TRANS_PREFETCH_RESPONSE;
+ sendPrefetchResponse(sd, &control, sendbuffer, &size);
+ return 0;
+}
--- /dev/null
+#ifndef _PREFETCH_H_
+#define _PREFETCH_H_
+#include "queue.h"
+#include "dstm.h"
+
+#define GET_STRIDE(x) ((x & 0x7000) >> 12)
+#define GET_RANGE(x) (x & 0x0fff)
+#define GET_STRIDEINC(x) ((x & 0x8000) >> 15)
+#define GET_OID(x) ((int *) (x))
+#define GET_NUM_OFFSETS(x) ((short *) (x + sizeof(unsigned int)))
+#define GET_OFFSETS(x) ((short *) (x + sizeof(unsigned int) + sizeof(short)))
+
+#define INLINE inline __attribute__((always_inline))
+
+
+/****** Global structure **********/
+typedef struct objOffsetPile {
+ unsigned int oid;
+ short numoffset;
+ short *offsets;
+ struct objOffsetPile *next;
+} objOffsetPile_t;
+
+typedef struct perMcPrefetchList {
+ unsigned int mid;
+ objOffsetPile_t *list;
+ struct perMcPrefetchList *next;
+} perMcPrefetchList_t;
+
+typedef struct proPrefetchQ {
+ perMcPrefetchList_t *front, *rear;
+ pthread_mutex_t qlock;
+ pthread_mutexattr_t qlockattr;
+ pthread_cond_t qcond;
+} proPrefetchQ_t;
+
+typedef struct oidAtDepth {
+ int depth; //TODO Remove may not need since depth is never read
+ unsigned int oid;
+} oidAtDepth_t;
+
+// Global Prefetch Processing Queue
+proPrefetchQ_t prefetchQ;
+
+/**** Prefetch Queue to be processed functions ******/
+void proPrefetchQDealloc(perMcPrefetchList_t *);
+
+/******** Process Queue Element functions ***********/
+void rangePrefetch(unsigned int, short, short *);
+void *transPrefetchNew();
+perMcPrefetchList_t* processLocal(char *ptr);
+perMcPrefetchList_t *processRemote(unsigned int oid, short * offsetarray, int sd, short numoffset);
+void insertPrefetch(int, unsigned int, short, short*, perMcPrefetchList_t **);
+
+/******** Sending and Receiving Prefetches *******/
+void sendRangePrefetchReq(perMcPrefetchList_t *, int sd, unsigned int mid);
+int rangePrefetchReq(int acceptfd);
+int processOidFound(objheader_t *, short *, int, int, int);
+int getRangePrefetchResponse(int sd);
+INLINE objheader_t *searchObj(unsigned int);
+
+
+/*********** Functions for computation at the participant end **********/
+unsigned int getNextOid(objheader_t * header, short * offsetarray, unsigned int *dfsList, int top);
+int sendOidFound(objheader_t *, unsigned int, int, char *buffer, int *bufoffset);
+int sendOidNotFound(unsigned int oid, int sd);
+void flushResponses(int sd, char * buffer, int * bufoffset);
+
+#endif
--- /dev/null
+/* LOCK THE ENTIRE HASH TABLE */
+#include "prelookup.h"
+#include "gCollect.h"
+extern objstr_t *prefetchcache;
+extern pthread_mutex_t prefetchcache_mutex; //Mutex to lock Prefetch Cache
+extern prefetchNodeInfo_t pNodeInfo;
+
+prehashtable_t pflookup; //Global prefetch cache table
+
+unsigned int prehashCreate(unsigned int size, float loadfactor) {
+ prehashlistnode_t *nodes;
+ int i;
+
+ // Allocate space for the hash table
+ if((nodes = calloc(size, sizeof(prehashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ pflookup.table = nodes;
+ pflookup.size = size;
+ pflookup.mask = (size << 1) -1;
+ pflookup.numelements = 0; // Initial number of elements in the hash
+ pflookup.loadfactor = loadfactor;
+ pflookup.threshold=loadfactor*size;
+
+ //Intiliaze and set prefetch table mutex attribute
+ pthread_mutexattr_init(&pflookup.prefetchmutexattr);
+ //NOTE:PTHREAD_MUTEX_RECURSIVE is currently inside a #if_def UNIX98 in the pthread.h file
+ //Therefore use PTHREAD_MUTEX_RECURSIVE_NP instead
+ pthread_mutexattr_settype(&pflookup.prefetchmutexattr, PTHREAD_MUTEX_RECURSIVE_NP);
+
+ //Initialize mutex var
+ pthread_mutex_init(&pflookup.lock, &pflookup.prefetchmutexattr);
+ //pthread_mutex_init(&pflookup.lock, NULL);
+ pthread_cond_init(&pflookup.cond, NULL);
+ return 0;
+}
+
+//Assign keys to bins inside hash table
+unsigned int prehashFunction(unsigned int key) {
+ return ( key & pflookup.mask) >> 1;
+}
+
+//Store oids and their pointers into hash
+void prehashInsert(unsigned int key, void *val) {
+ prehashlistnode_t *ptr;
+ pthread_mutex_lock(&pflookup.lock);
+
+ if(pflookup.numelements > (pflookup.threshold)) {
+ //Resize
+ unsigned int newsize = pflookup.size << 1;
+ prehashResize(newsize);
+ }
+
+
+ ptr = &pflookup.table[(key & pflookup.mask)>>1];
+ pflookup.numelements++;
+
+ if(ptr->key==0) {
+ ptr->key = key;
+ ptr->val = val;
+ } else { // Insert in the beginning of linked list
+ prehashlistnode_t * node = calloc(1, sizeof(prehashlistnode_t));
+ node->key = key;
+ node->val = val ;
+ node->next = ptr->next;
+ ptr->next=node;
+ }
+ pthread_mutex_unlock(&pflookup.lock);
+}
+
+// Search for an address for a given oid
+void *prehashSearch(unsigned int key) {
+ int index;
+ prehashlistnode_t *ptr, *node;
+
+ pthread_mutex_lock(&pflookup.lock);
+ node = & pflookup.table[(key & pflookup.mask)>>1];
+ do {
+ if(node->key == key) {
+ void * tmp=node->val;
+ pthread_mutex_unlock(&pflookup.lock);
+ return tmp;
+ }
+ node = node->next;
+ } while (node!=NULL);
+ pthread_mutex_unlock(&pflookup.lock);
+ return NULL;
+}
+
+unsigned int prehashRemove(unsigned int key) {
+ int index;
+ prehashlistnode_t *curr, *prev;
+ prehashlistnode_t *ptr, *node;
+
+ pthread_mutex_lock(&pflookup.lock);
+ ptr = pflookup.table;
+ index = prehashFunction(key);
+ curr = &ptr[index];
+
+ for (; curr != NULL; curr = curr->next) {
+ if (curr->key == key) { // Find a match in the hash table
+ pflookup.numelements--; // Decrement the number of elements in the global hashtable
+ if ((curr == &ptr[index]) && (curr->next == NULL)) { // Delete the first item inside the hashtable with no linked list of prehashlistnode_t
+ curr->key = 0;
+ curr->val = NULL;
+ } else if ((curr == &ptr[index]) && (curr->next != NULL)) { //Delete the first item with a linked list of prehashlistnode_t connected
+ curr->key = curr->next->key;
+ curr->val = curr->next->val;
+ node = curr->next;
+ curr->next = curr->next->next;
+ free(node);
+ } else { // Regular delete from linked listed
+ prev->next = curr->next;
+ free(curr);
+ }
+ pthread_mutex_unlock(&pflookup.lock);
+ return 0;
+ }
+ prev = curr;
+ }
+ pthread_mutex_unlock(&pflookup.lock);
+ return 1;
+}
+
+unsigned int prehashResize(unsigned int newsize) {
+ prehashlistnode_t *node, *ptr; // curr and next keep track of the current and the next chashlistnodes in a linked list
+ unsigned int oldsize;
+ int i,index;
+ unsigned int mask;
+ ptr = pflookup.table;
+ oldsize = pflookup.size;
+
+ if((node = calloc(newsize, sizeof(prehashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ pflookup.table = node; //Update the global hashtable upon resize()
+ pflookup.size = newsize;
+ pflookup.threshold=newsize*pflookup.loadfactor;
+ mask=pflookup.mask = (newsize << 1) -1;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ prehashlistnode_t * curr = &ptr[i];
+ prehashlistnode_t *tmp, *next;
+ int isfirst = 1;
+ do {
+ unsigned int key;
+ if ((key=curr->key) == 0) { //Exit inner loop if there the first element for a given bin/index is NULL
+ break; //key = val =0 for element if not present within the hash table
+ }
+ next = curr->next;
+ index = (key & mask)>>1;
+ tmp=&pflookup.table[index];
+ // Insert into the new table
+ if(tmp->key==0) {
+ tmp->key=curr->key;
+ tmp->val=curr->val;
+ if (!isfirst)
+ free(curr);
+ } /*
+ NOTE: Add this case if you change this...
+ This case currently never happens because of the way things rehash....
+else if (isfirst) {
+ prehashlistnode_t * newnode = calloc(1, sizeof(prehashlistnode_t));
+ newnode->key = curr->key;
+ newnode->val = curr->val;
+ newnode->next = tmp->next;
+ tmp->next=newnode;
+ } */
+ else {
+ curr->next=tmp->next;
+ tmp->next=curr;
+ }
+
+ isfirst = 0;
+ curr = next;
+ } while(curr!=NULL);
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+//Note: This is based on the implementation of the inserting a key in the first position of the hashtable
+void prehashClear() {
+#ifdef CACHE
+ int i, isFirstBin;
+ prehashlistnode_t *ptr, *prev, *curr;
+
+ pthread_mutex_lock(&pflookup.lock);
+ ptr = pflookup.table;
+ for(i = 0; i < pflookup.size; i++) {
+ prev = &ptr[i];
+ isFirstBin = 1;
+ while(prev->next != NULL) {
+ isFirstBin = 0;
+ curr = prev->next;
+ prev->next = curr->next;
+ free(curr);
+ }
+ if(isFirstBin == 1) {
+ prev->key = 0;
+ prev->next = NULL;
+ }
+ }
+ {
+ int stale;
+ pthread_mutex_unlock(&pflookup.lock);
+ pthread_mutex_lock(&prefetchcache_mutex);
+ if (pNodeInfo.newstale==NULL) {
+ //transfer the list wholesale;
+ pNodeInfo.oldstale=pNodeInfo.oldptr;
+ pNodeInfo.newstale=pNodeInfo.newptr;
+ } else {
+ //merge the two lists
+ pNodeInfo.newstale->prev=pNodeInfo.oldptr;
+ pNodeInfo.newstale=pNodeInfo.newptr;
+ }
+ stale=STALL_THRESHOLD-pNodeInfo.stale_count;
+
+ if (stale>0&&stale>pNodeInfo.stall)
+ pNodeInfo.stall=stale;
+
+ pNodeInfo.stale_count+=pNodeInfo.os_count;
+ pNodeInfo.oldptr=getObjStr(DEFAULT_OBJ_STORE_SIZE);
+ pNodeInfo.newptr=pNodeInfo.oldptr;
+ pNodeInfo.os_count=1;
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ }
+#endif
+}
+
--- /dev/null
+#ifndef _PRELOOKUP_H_
+#define _PRELOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include "dstm.h"
+
+#define PLOADFACTOR 0.25
+#define PHASH_SIZE 1024
+
+typedef struct prehashlistnode {
+ unsigned int key;
+ void *val; //this can be cast to another type or used to point to a larger structure
+ struct prehashlistnode *next;
+} prehashlistnode_t;
+
+struct objstr;
+
+typedef struct prehashtable {
+ prehashlistnode_t *table; // points to beginning of hash table
+ unsigned int size;
+ unsigned int mask;
+ unsigned int numelements;
+ unsigned int threshold;
+ double loadfactor;
+ pthread_mutex_t lock;
+ pthread_mutexattr_t prefetchmutexattr;
+ pthread_cond_t cond;
+ struct objstr *hack2;
+ struct objstr *hack;
+} prehashtable_t;
+
+/* Prototypes for hash*/
+unsigned int prehashCreate(unsigned int size, float loadfactor);
+unsigned int prehashFunction(unsigned int key);
+void prehashInsert(unsigned int key, void *val);
+void *prehashSearch(unsigned int key); //returns val, NULL if not found
+unsigned int prehashRemove(unsigned int key); //returns -1 if not found
+unsigned int prehashResize(unsigned int newsize);
+void prehashClear();
+/* end hash */
+
+#endif
+
--- /dev/null
+#include "queue.h"
+
+volatile int headoffset, tailoffset;
+char * memory;
+pthread_mutex_t qlock;
+pthread_mutexattr_t qlockattr;
+pthread_cond_t qcond;
+
+#define QSIZE 2048 //2 KB
+
+void queueInit(void) {
+ /* Intitialize primary queue */
+ headoffset=0;
+ tailoffset=0;
+ memory=malloc(QSIZE+sizeof(int)); //leave space for -1
+ pthread_mutexattr_init(&qlockattr);
+ pthread_mutexattr_settype(&qlockattr, PTHREAD_MUTEX_RECURSIVE_NP);
+ pthread_mutex_init(&qlock, &qlockattr);
+ pthread_cond_init(&qcond, NULL);
+}
+
+void * getmemory(int size) {
+ int tmpoffset=headoffset+size+sizeof(int);
+ if (tmpoffset>QSIZE) {
+ //Wait for tail to go past end
+ tmpoffset=size+sizeof(int);
+ if (headoffset<tailoffset) {
+ pthread_cond_signal(&qcond); //wake the other thread up
+ return NULL;
+ }
+ //Wait for tail to go past new start
+ if (tailoffset<=tmpoffset) {
+ pthread_cond_signal(&qcond); //wake the other thread up
+ return NULL;
+ }
+ *((int *)(memory+headoffset))=-1; //safe because we left space
+ *((int*)memory)=size+sizeof(int);
+ return memory+sizeof(int);
+ } else {
+ if (headoffset<tailoffset&&tailoffset<=tmpoffset) {
+ pthread_cond_signal(&qcond); //wake the other thread up
+ return NULL;
+ }
+ *((int*)(memory+headoffset))=size+sizeof(int);
+ return memory+headoffset+sizeof(int);
+ }
+}
+
+void movehead(int size) {
+ int tmpoffset=headoffset+size+sizeof(int);
+ if (tmpoffset>QSIZE) {
+ headoffset=size+sizeof(int);
+ } else
+ headoffset=tmpoffset;
+ pthread_cond_signal(&qcond); //wake the other thread up
+}
+
+void * gettail() {
+ while(tailoffset==headoffset) {
+ //Sleep
+ pthread_mutex_lock(&qlock);
+ if (tailoffset==headoffset)
+ pthread_cond_wait(&qcond, &qlock);
+ pthread_mutex_unlock(&qlock);
+ }
+ if (*((int *)(memory+tailoffset))==-1) {
+ tailoffset=0; //do loop
+ }
+
+ return memory+tailoffset+sizeof(int);
+}
+
+void inctail() {
+ int tmpoffset=tailoffset+*((int *)(memory+tailoffset));
+ if (tmpoffset>QSIZE)
+ tailoffset=0;
+ else
+ tailoffset=tmpoffset;
+}
+
+void predealloc() {
+ free(memory);
+}
+
--- /dev/null
+#ifndef _QUEUE_H_
+#define _QUEUE_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include "dstm.h"
+
+void queueInit(void);
+void * getmemory(int size);
+void movehead(int size);
+void * gettail();
+void inctail();
+void predealloc();
+#endif
--- /dev/null
+#include "dstm.h"
+#include "addPrefetchEnhance.h"
+#include <signal.h>
+#include <fcntl.h>
+
+extern int numTransAbort;
+extern int numTransCommit;
+extern int nchashSearch;
+extern int nmhashSearch;
+extern int nprehashSearch;
+extern int nRemoteSend;
+extern int nSoftAbort;
+extern int bytesSent;
+extern int bytesRecv;
+extern int totalObjSize;
+extern unsigned int myIpAddr;
+
+void handle();
+extern pfcstats_t *evalPrefetch;
+
+void transStatsHandler(int sig, siginfo_t* info, void *context) {
+#ifdef TRANSSTATS
+ FILE *fp;
+ if ((fp = fopen("/tmp/client_stats.txt", "a+")) == NULL) {
+ exit(-1);
+ }
+ fprintf(fp, "****** Transaction Stats ******\n");
+ fprintf(fp, "myIpAddr = %x\n", myIpAddr);
+ fprintf(fp, "numTransAbort = %d\n", numTransAbort);
+ fprintf(fp, "numTransCommit = %d\n", numTransCommit);
+ fprintf(fp, "nchashSearch = %d\n", nchashSearch);
+ fprintf(fp, "nmhashSearch = %d\n", nmhashSearch);
+ fprintf(fp, "nprehashSearch = %d\n", nprehashSearch);
+ fprintf(fp, "nRemoteReadSend = %d\n", nRemoteSend);
+ fprintf(fp, "nSoftAbort = %d\n", nSoftAbort);
+ fprintf(fp, "bytesSent = %d\n", bytesSent);
+ fprintf(fp, "bytesRecv = %d\n", bytesRecv);
+ fprintf(fp, "totalObjSize= %d\n", totalObjSize);
+ fprintf(fp, "**********************************\n");
+ fflush(fp);
+ fclose(fp);
+ exit(0);
+#endif
+}
+
+void handle() {
+#ifdef TRANSSTATS
+ struct sigaction siga;
+ siga.sa_handler = NULL;
+ siga.sa_flags = SA_SIGINFO;
+ siga.sa_flags = 0;
+ siga.sa_sigaction = &transStatsHandler;
+ sigemptyset(&siga.sa_mask);
+ sigaction(SIGUSR1, &siga, 0);
+#endif
+}
+/*
+
+ double getMax(double *array, int size) {
+ int i;
+ double max = array[0];
+ for(i = 0; i < size; i++) { // for 2 MCS
+ if(max <= array[i])
+ max = array[i];
+ }
+ return max;
+ }
+
+ double getMin(double *array, int size) {
+ int i;
+ double min = array[0];
+ for(i = 0; i < size; i++) { //for 2 MCs
+ if(min > array[i])
+ min = array[i];
+ }
+ return min;
+ }
+
+ int getthreadid() {
+ int val;
+ if(((128<<24)|(195<<16)|(175<<8)|84) == myIpAddr)
+ val = 0;
+ else if(((128<<24)|(195<<16)|(175<<8)|86) == myIpAddr)
+ val = 1;
+ else if(((128<<24)|(195<<16)|(175<<8)|87) == myIpAddr)
+ val = 2;
+ else if(((128<<24)|(195<<16)|(175<<8)|88) == myIpAddr)
+ val = 3;
+ else
+ val = 4;
+ printf("threadid/mid = %d\n", val);
+ return val;
+ }
+
+ double getfast(int siteid, int threadid) {
+ int i, j, k;
+ double fast = 0.0;
+ //for(i = 0; i < 2; i++) { // for 2 MC
+ for(i = 0; i < 5; i++) { // for 5 MC
+ if(i == threadid)
+ continue;
+ for(k= 0; k<countstats[i]; k++) {
+ if(fast < threadstats[i][siteid][k])
+ fast = threadstats[i][siteid][k];
+ }
+ }
+ return fast;
+ }
+
+ void sortascending() {
+ int i;
+ for(i = 0 ; i < 5; i++) {
+
+ }
+ }
+
+ void bubblesort() {
+ const int size = 5; // 5MCS
+ int siteid;
+ for(siteid = 0; siteid < 15; siteid++) {
+ int k;
+ for(k=0; k<counttransCommit; k++) {
+ int i;
+ for(i=0; i < size-1; i++) {
+ int j;
+ for(j=0; j < size-1-i; j++) {
+ if(threadstats[j][siteid][k] > threadstats[j+1][siteid][k]) {
+ double temp;
+ temp = threadstats[j+1][siteid][k];
+ threadstats[j+1][siteid][k] = threadstats[j][siteid][k];
+ threadstats[j][siteid][k] = temp;
+ }
+ }
+ } //end of sorting
+ } // end for each transaction
+ } // end for each siteid
+ }
+
+ double avgofthreads(int siteid, int threadid) {
+ double total = 0.0;
+ int k;
+ for(k = 0; k<counttransCommit; k++) {
+ total += threadstats[threadid][siteid][k];
+ }
+ double avg = 0.0;
+ avg = total/counttransCommit;
+ return avg;
+ }
+
+ double avgfast(int siteid, int threadid) {
+ int i, j, k;
+ double fast;
+ for(k = 0; k<counttransCommit; k++) {
+ fast = 0.0;
+ for(i = 0; i <5; i++) { //for 5 mC
+ if(i == threadid)
+ continue;
+ if(fast < threadstats[i][siteid][k]) {
+ fast = threadstats[i][siteid][k];
+ }
+ }
+ avgfasttime[k] = fast;
+ }
+ double total= 0.0;
+ for(k = 0; k<counttransCommit; k++) {
+ total += avgfasttime[k];
+ }
+ return (total/counttransCommit);
+ }
+
+ double avgslow(int siteid, int threadid) {
+ int i, j, k;
+ double slow;
+ for(k = 0; k<counttransCommit; k++) {
+ slow = 1.0;
+ for(i = 0; i < 2; i++) { //for 2 mC
+ if(i == threadid)
+ continue;
+ if(slow > threadstats[i][siteid][k]) {
+ slow = threadstats[i][siteid][k];
+ }
+ }
+ avgslowtime[k] = slow;
+ }
+ double total= 0.0;
+ for(k = 0; k<counttransCommit; k++) {
+ total += avgslowtime[k];
+ }
+ return (total/counttransCommit);
+ }
+
+ double getslowest(int siteid, int threadid) {
+ int i, j, k;
+ double slow = 1.0;
+ //for(i = 0; i < 2; i++) { // for 2 MC
+ for(i = 0; i < 5; i++) { // for 5 MC
+ if(i == threadid)
+ continue;
+ for(k= 0; k<countstats[i]; k++) {
+ if(slow > threadstats[i][siteid][k]) {
+ slow = threadstats[i][siteid][k];
+ }
+ }
+ }
+ return slow;
+ }
+
+ double getavg(int siteid, int threadid) {
+ double total=0.0;
+ int i, j, k;
+ int totalcount = 0;
+ //for(i = 0; i < 2; i++) { //for 2 MC
+ for(i = 0; i < 5; i++) { //for 5 MC
+ if(i == threadid)
+ continue;
+ for(k= 0; k<countstats[i]; k++) {
+ total += threadstats[i][siteid][k];
+ }
+ totalcount +=countstats[i];
+ }
+ double avg = total/totalcount;
+ return avg;
+ }
+
+ double getavgperthd(int siteid, int threadid) {
+ double total=0.0;
+ int i, j, k;
+ for(k= 0; k<countstats[threadid]; k++) {
+ total += threadstats[threadid][siteid][k];
+ }
+ double avg = total/countstats[threadid];
+ return avg;
+ }
+ */
--- /dev/null
+/* ============================================================
+ * singleTMCommit.c
+ * - single thread commit on local machine
+ * =============================================================
+ * Copyright (c) 2009, University of California, Irvine, USA.
+ * All rights reserved.
+ * Author: Alokika Dash
+ * adash@uci.edu
+ * =============================================================
+ *
+ */
+
+#include "tm.h"
+#include "garbage.h"
+#define likely(x) x
+/* Per thread transaction variables */
+__thread objstr_t *t_cache;
+__thread objstr_t *t_reserve;
+__thread struct objlist * newobjs;
+
+#ifdef DELAYCOMP
+#include "delaycomp.h"
+__thread struct pointerlist ptrstack;
+__thread struct primitivelist primstack;
+__thread struct branchlist branchstack;
+struct pointerlist *c_ptrstack;
+struct primitivelist *c_primstack;
+struct branchlist *c_branchstack;
+#endif
+
+#ifdef TRANSSTATS
+int numTransCommit = 0;
+int numTransAbort = 0;
+int nSoftAbort = 0;
+int nSoftAbortCommit = 0;
+int nSoftAbortAbort = 0;
+#endif
+
+#ifdef STMSTATS
+/* Thread variable for locking/unlocking */
+__thread threadrec_t *trec;
+__thread struct objlist * lockedobjs;
+/** Global lock **/
+int typesCausingAbort[TOTALNUMCLASSANDARRAY];
+/******Keep track of objects and types causing aborts******/
+/* TODO uncomment for later use
+#define DEBUGSTMSTAT(args...) { \
+ printf(args); \
+ fflush(stdout); \
+}
+*/
+#define DEBUGSTMSTAT(args...)
+#else
+#define DEBUGSTMSTAT(args...)
+#endif
+
+#ifdef STMDEBUG
+#define DEBUGSTM(x...) printf(x);
+#else
+#define DEBUGSTM(x...)
+#endif
+
+#ifdef FASTMEMCPY
+void * A_memcpy (void * dest, const void * src, size_t count);
+#else
+#define A_memcpy memcpy
+#endif
+
+extern void * curr_heapbase;
+extern void * curr_heapptr;
+extern void * curr_heaptop;
+
+#ifdef STMSTATS
+/*** Global variables *****/
+objlockstate_t *objlockscope;
+/**
+ * ABORTCOUNT
+ * params: object header
+ * Increments the abort count for each object
+ **/
+void ABORTCOUNT(objheader_t * x) {
+ x->abortCount++;
+ if (x->abortCount > MAXABORTS && (x->riskyflag != 1)) {
+ //makes riskflag sticky
+ pthread_mutex_lock(&lockedobjstore);
+ if (objlockscope->offset<MAXOBJLIST) {
+ x->objlock=&(objlockscope->lock[objlockscope->offset++]);
+ } else {
+ objlockstate_t *tmp=malloc(sizeof(objlockstate_t));
+ tmp->next=objlockscope;
+ tmp->offset=1;
+ x->objlock=&(tmp->lock[0]);
+ objlockscope=tmp;
+ }
+ pthread_mutex_unlock(&lockedobjstore);
+ pthread_mutex_init(x->objlock, NULL);
+ //should put a memory barrier here
+ x->riskyflag = 1;
+ }
+}
+#endif
+
+/* ==================================================
+ * stmStartup
+ * This function starts up the transaction runtime.
+ * ==================================================
+ */
+int stmStartup() {
+ return 0;
+}
+
+/* ======================================
+ * objstrCreate
+ * - create an object store of given size
+ * ======================================
+ */
+objstr_t *objstrCreate(unsigned int size) {
+ objstr_t *tmp;
+ if((tmp = calloc(1, (sizeof(objstr_t) + size))) == NULL) {
+ printf("%s() Calloc error at line %d, %s\n", __func__, __LINE__, __FILE__);
+ return NULL;
+ }
+ tmp->size = size;
+ tmp->next = NULL;
+ tmp->top = tmp + 1; //points to end of objstr_t structure!
+ return tmp;
+}
+
+void objstrReset() {
+ while(t_cache->next!=NULL) {
+ objstr_t *next=t_cache->next;
+ t_cache->next=t_reserve;
+ t_reserve=t_cache;
+ t_cache=next;
+ }
+ t_cache->top=t_cache+1;
+}
+
+//free entire list, starting at store
+void objstrDelete(objstr_t *store) {
+ objstr_t *tmp;
+ while (store != NULL) {
+ tmp = store->next;
+ free(store);
+ store = tmp;
+ }
+ return;
+}
+
+/* =================================================
+ * transStart
+ * This function initializes things required in the
+ * transaction start
+ * =================================================
+ */
+void transStart() {
+ //Transaction start is currently free...commit and aborting is not
+#ifdef DELAYCOMP
+ c_ptrstack=&ptrstack;
+ c_primstack=&primstack;
+ c_branchstack=&branchstack;
+#endif
+}
+
+/* =======================================================
+ * transCreateObj
+ * This function creates objects in the transaction record
+ * =======================================================
+ */
+objheader_t *transCreateObj(void * ptr, unsigned int size) {
+ objheader_t *tmp = mygcmalloc(ptr, (sizeof(objheader_t) + size));
+ objheader_t *retval=&tmp[1];
+ tmp->lock=RW_LOCK_BIAS;
+ tmp->version = 1;
+ //initialize obj lock to the header
+ STATUS(tmp)=NEW;
+ // don't insert into table
+ if (newobjs->offset<MAXOBJLIST) {
+ newobjs->objs[newobjs->offset++]=retval;
+ } else {
+ struct objlist *tmp=malloc(sizeof(struct objlist));
+ tmp->next=newobjs;
+ tmp->objs[0]=retval;
+ tmp->offset=1;
+ newobjs=tmp;
+ }
+ return retval; //want space after object header
+}
+
+/* This functions inserts randowm wait delays in the order of msec
+ * Mostly used when transaction commits retry*/
+void randomdelay(int softaborted) {
+ struct timespec req;
+ struct timeval t;
+
+ gettimeofday(&t,NULL);
+
+ req.tv_sec = 0;
+ req.tv_nsec = (long)((t.tv_usec)%(1<<softaborted))<<1; //1-11 microsec
+ nanosleep(&req, NULL);
+ return;
+}
+
+/* ==============================================
+ * objstrAlloc
+ * - allocate space in an object store
+ * ==============================================
+ */
+void *objstrAlloc(unsigned int size) {
+ void *tmp;
+ int i=0;
+ objstr_t *store=t_cache;
+ if ((size&7)!=0) {
+ size+=(8-(size&7));
+ }
+
+ for(; i<2; i++) {
+ if (OSFREE(store)>=size) {
+ tmp=store->top;
+ store->top +=size;
+ return tmp;
+ }
+ if ((store=store->next)==NULL)
+ break;
+ }
+
+ {
+ unsigned int newsize=size>DEFAULT_OBJ_STORE_SIZE ? size : DEFAULT_OBJ_STORE_SIZE;
+ objstr_t **otmp=&t_reserve;
+ objstr_t *ptr;
+ while((ptr=*otmp)!=NULL) {
+ if (ptr->size>=newsize) {
+ //remove from list
+ *otmp=ptr->next;
+ ptr->next=t_cache;
+ t_cache=ptr;
+ ptr->top=((char *)(&ptr[1]))+size;
+ return &ptr[1];
+ }
+ }
+
+ objstr_t *os=(objstr_t *)calloc(1,(sizeof(objstr_t) + newsize));
+ void *nptr=&os[1];
+ os->next=t_cache;
+ t_cache=os;
+ os->size=newsize;
+ os->top=((char *)nptr)+size;
+ return nptr;
+ }
+}
+
+/* =============================================================
+ * transRead
+ * -finds the objects either in main heap
+ * -copies the object into the transaction cache
+ * =============================================================
+ */
+__attribute__ ((pure)) void *transRead(void * oid, void *gl) {
+ objheader_t *tmp, *objheader;
+ objheader_t *objcopy;
+ int size;
+
+ /* Read from the main heap */
+ //No lock for now
+ objheader_t *header = (objheader_t *)(((char *)oid) - sizeof(objheader_t));
+ GETSIZE(size, header);
+ size += sizeof(objheader_t);
+ objcopy = (objheader_t *) objstrAlloc(size);
+#ifdef STMSTATS
+ header->accessCount++;
+ if(header->riskyflag) {
+ header=needLock(header,gl);
+ }
+#endif
+ A_memcpy(objcopy, header, size);
+ /* Insert into cache's lookup table */
+ STATUS(objcopy)=0;
+ if (((unsigned INTPTR)oid)<((unsigned INTPTR ) curr_heapbase)|| ((unsigned INTPTR)oid) >((unsigned INTPTR) curr_heapptr))
+ printf("ERROR! Bad object address!\n");
+ t_chashInsert(oid, &objcopy[1]);
+ return &objcopy[1];
+}
+
+void freenewobjs() {
+ struct objlist *ptr=newobjs;
+ while(ptr->next!=NULL) {
+ struct objlist *tmp=ptr->next;
+ free(ptr);
+ ptr=tmp;
+ }
+ ptr->offset=0;
+ newobjs=ptr;
+}
+
+#ifdef STMSTATS
+void freelockedobjs() {
+ struct objlist *ptr=lockedobjs;
+ while(ptr->next!=NULL) {
+ struct objlist *tmp=ptr->next;
+ free(ptr);
+ ptr=tmp;
+ }
+ ptr->offset=0;
+ lockedobjs=ptr;
+}
+#endif
+
+/* ================================================================
+ * transCommit
+ * - This function initiates the transaction commit process
+ * - goes through the transaction cache and decides
+ * - a final response
+ * ================================================================
+ */
+#ifdef DELAYCOMP
+int transCommit(void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params) {
+#else
+int transCommit() {
+#endif
+ int softaborted=0;
+ do {
+ /* Look through all the objects in the transaction hash table */
+ int finalResponse;
+#ifdef DELAYCOMP
+ if (c_numelements<(c_size>>3))
+ finalResponse= alttraverseCache(commitmethod, primitives, locals, params);
+ else
+ finalResponse= traverseCache(commitmethod, primitives, locals, params);
+#else
+ if (c_numelements<(c_size>>3))
+ finalResponse= alttraverseCache();
+ else
+ finalResponse= traverseCache();
+#endif
+ if(finalResponse == TRANS_ABORT) {
+#ifdef TRANSSTATS
+ numTransAbort++;
+ if (softaborted) {
+ nSoftAbortAbort++;
+ }
+#endif
+ freenewobjs();
+#ifdef STMSTATS
+ freelockedobjs();
+#endif
+ objstrReset();
+ t_chashreset();
+#ifdef DELAYCOMP
+ dc_t_chashreset();
+ ptrstack.count=0;
+ primstack.count=0;
+ branchstack.count=0;
+#endif
+ return TRANS_ABORT;
+ }
+ if(finalResponse == TRANS_COMMIT) {
+#ifdef TRANSSTATS
+ numTransCommit++;
+ if (softaborted) {
+ nSoftAbortCommit++;
+ }
+#endif
+ freenewobjs();
+#ifdef STMSTATS
+ freelockedobjs();
+#endif
+ objstrReset();
+ t_chashreset();
+#ifdef DELAYCOMP
+ dc_t_chashreset();
+ ptrstack.count=0;
+ primstack.count=0;
+ branchstack.count=0;
+#endif
+ return 0;
+ }
+ /* wait a random amount of time before retrying to commit transaction*/
+ if(finalResponse == TRANS_SOFT_ABORT) {
+#ifdef TRANSSTATS
+ nSoftAbort++;
+#endif
+ softaborted++;
+#ifdef SOFTABORT
+ if (softaborted>1) {
+#else
+ if (1) {
+#endif
+ //retry if too many soft aborts
+ freenewobjs();
+#ifdef STMSTATS
+ freelockedobjs();
+#endif
+ objstrReset();
+ t_chashreset();
+#ifdef DELAYCOMP
+ dc_t_chashreset();
+ ptrstack.count=0;
+ primstack.count=0;
+ branchstack.count=0;
+#endif
+ return TRANS_ABORT;
+ }
+ //randomdelay(softaborted);
+ } else {
+ printf("Error: in %s() Unknown outcome", __func__);
+ exit(-1);
+ }
+ } while (1);
+}
+
+#ifdef DELAYCOMP
+#define freearrays if (c_numelements>=200) { \
+ free(oidrdlocked); \
+ free(oidrdversion); \
+ } \
+ if (t_numelements>=200) { \
+ free(oidwrlocked); \
+ }
+#else
+#define freearrays if (c_numelements>=200) { \
+ free(oidrdlocked); \
+ free(oidrdversion); \
+ free(oidwrlocked); \
+ }
+#endif
+/* ==================================================
+ * traverseCache
+ * - goes through the transaction cache and
+ * - decides if a transaction should commit or abort
+ * ==================================================
+ */
+#ifdef DELAYCOMP
+int traverseCache(void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params) {
+#else
+int traverseCache() {
+#endif
+ /* Create info to keep track of objects that can be locked */
+ int numoidrdlocked=0;
+ int numoidwrlocked=0;
+ void * rdlocked[200];
+ int rdversion[200];
+ void * wrlocked[200];
+ int softabort=0;
+ int i;
+ void ** oidrdlocked;
+ void ** oidwrlocked;
+ int * oidrdversion;
+#ifdef DELAYCOMP
+ int t_numelements=c_numelements+dc_c_numelements;
+ if (t_numelements<200) {
+ oidwrlocked=wrlocked;
+ } else {
+ oidwrlocked=malloc(t_numelements*sizeof(void *));
+ }
+ if (c_numelements<200) {
+ oidrdlocked=rdlocked;
+ oidrdversion=rdversion;
+ } else {
+ int size=c_numelements*sizeof(void*);
+ oidrdlocked=malloc(size);
+ oidrdversion=malloc(size);
+ }
+#else
+ if (c_numelements<200) {
+ oidrdlocked=rdlocked;
+ oidrdversion=rdversion;
+ oidwrlocked=wrlocked;
+ } else {
+ int size=c_numelements*sizeof(void*);
+ oidrdlocked=malloc(size);
+ oidrdversion=malloc(size);
+ oidwrlocked=malloc(size);
+ }
+#endif
+ chashlistnode_t *ptr = c_table;
+ /* Represents number of bins in the chash table */
+ unsigned int size = c_size;
+ for(i = 0; i<size; i++) {
+ chashlistnode_t *curr = &ptr[i];
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(curr != NULL) {
+ //if the first bin in hash table is empty
+ if(curr->key == NULL)
+ break;
+ objheader_t * headeraddr=&((objheader_t *) curr->val)[-1];
+ objheader_t *header=(objheader_t *)(((char *)curr->key)-sizeof(objheader_t));
+ unsigned int version = headeraddr->version;
+
+ if(STATUS(headeraddr) & DIRTY) {
+ /* Read from the main heap and compare versions */
+ if(write_trylock(&header->lock)) { //can aquire write lock
+ if (version == header->version) { /* versions match */
+ /* Keep track of objects locked */
+ oidwrlocked[numoidwrlocked++] = header;
+ } else {
+ oidwrlocked[numoidwrlocked++] = header;
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+ getTotalAbortCount(i+1, size, (void *)(curr->next), numoidrdlocked, oidrdlocked, oidrdversion);
+#endif
+ DEBUGSTM("WR Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("WR Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+
+ }
+ } else {
+#ifdef DELAYCOMP
+ //TODO: check to see if we already have lock
+#endif
+ if(version == header->version) {
+ /* versions match */
+ softabort=1;
+ }
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+#if defined(STMSTATS)||defined(SOFTABORT)
+ if(getTotalAbortCount(i+1, size, (void *)(curr->next), numoidrdlocked, oidrdlocked, oidrdversion))
+ softabort=0;
+#endif
+ DEBUGSTM("WR Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("WR Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+
+ }
+ } else {
+ oidrdversion[numoidrdlocked]=version;
+ oidrdlocked[numoidrdlocked++] = header;
+ }
+ curr = curr->next;
+ }
+ } //end of for
+
+#ifdef DELAYCOMP
+ //acquire other locks
+ unsigned int numoidwrtotal=numoidwrlocked;
+
+ chashlistnode_t *dc_curr = dc_c_list;
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(likely(dc_curr != NULL)) {
+ //if the first bin in hash table is empty
+ objheader_t * headeraddr=&((objheader_t *) dc_curr->val)[-1];
+ objheader_t *header=(objheader_t *)(((char *)dc_curr->key)-sizeof(objheader_t));
+ if(write_trylock(&header->lock)) { //can aquire write lock
+ oidwrlocked[numoidwrtotal++] = header;
+ } else {
+ //maybe we already have lock
+ void * key=dc_curr->key;
+ chashlistnode_t *node = &c_table[(((unsigned INTPTR)key) & c_mask)>>4];
+
+ do {
+ if(node->key == key) {
+ objheader_t * headeraddr=&((objheader_t *) node->val)[-1];
+ if(STATUS(headeraddr) & DIRTY) {
+ goto nextloop;
+ } else
+ break;
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ //have to abort to avoid deadlock
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+#if defined(STMSTATS)||defined(SOFTABORT)
+ if(getTotalAbortCount(i+1, size, (void *)(curr->next), numoidrdlocked, oidrdlocked, oidrdversion))
+ softabort=0;
+#endif
+ DEBUGSTM("WR Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("WR Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+ }
+ nextloop:
+ dc_curr = dc_curr->lnext;
+ }
+#endif
+
+ //THIS IS THE SERIALIZATION END POINT (START POINT IS END OF EXECUTION)*****
+
+ for(i=0; i<numoidrdlocked; i++) {
+ /* Read from the main heap and compare versions */
+ objheader_t *header=oidrdlocked[i];
+ unsigned int version=oidrdversion[i];
+ if(header->lock>0) { //not write locked
+ if(version != header->version) { /* versions do not match */
+#ifdef DELAYCOMP
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#else
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#endif
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+ getReadAbortCount(i+1, numoidrdlocked, oidrdlocked, oidrdversion);
+#endif
+ DEBUGSTM("RD Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("RD Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ return TRANS_ABORT;
+ }
+#if DELAYCOMP
+ } else if (dc_t_chashSearch(((char *)header)+sizeof(objheader_t))!=NULL) {
+ //couldn't get lock because we already have it
+ //check if it is the right version number
+ if (version!=header->version) {
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+ getReadAbortCount(i+1, numoidrdlocked, oidrdlocked, oidrdversion);
+#endif
+ DEBUGSTM("RD Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("RD Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ return TRANS_ABORT;
+ }
+#endif
+ } else { /* cannot aquire lock */
+ //do increment as we didn't get lock
+ if(version == header->version) {
+ softabort=1;
+ }
+#ifdef DELAYCOMP
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#else
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#endif
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+#if defined(STMSTATS)||defined(SOFTABORT)
+ if(getReadAbortCount(i+1, numoidrdlocked, oidrdlocked, oidrdversion))
+ softabort=0;
+#endif
+ DEBUGSTM("RD Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("RD Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+
+ }
+ }
+
+ /* Decide the final response */
+#ifdef DELAYCOMP
+ transCommitProcess(oidwrlocked, numoidwrlocked, numoidwrtotal, commitmethod, primitives, locals, params);
+#else
+ transCommitProcess(oidwrlocked, numoidwrlocked);
+#endif
+ DEBUGSTM("Commit: rd: %u wr: %u tot: %u\n", numoidrdlocked, numoidwrlocked, c_numelements);
+ freearrays;
+ return TRANS_COMMIT;
+}
+
+/* ==================================================
+ * alttraverseCache
+ * - goes through the transaction cache and
+ * - decides if a transaction should commit or abort
+ * ==================================================
+ */
+
+#ifdef DELAYCOMP
+int alttraverseCache(void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params) {
+#else
+int alttraverseCache() {
+#endif
+ /* Create info to keep track of objects that can be locked */
+ int numoidrdlocked=0;
+ int numoidwrlocked=0;
+ void * rdlocked[200];
+ int rdversion[200];
+ void * wrlocked[200];
+ int softabort=0;
+ int i;
+ void ** oidrdlocked;
+ int * oidrdversion;
+ void ** oidwrlocked;
+#ifdef DELAYCOMP
+ int t_numelements=c_numelements+dc_c_numelements;
+ if (t_numelements<200) {
+ oidwrlocked=wrlocked;
+ } else {
+ oidwrlocked=malloc(t_numelements*sizeof(void *));
+ }
+ if (c_numelements<200) {
+ oidrdlocked=rdlocked;
+ oidrdversion=rdversion;
+ } else {
+ int size=c_numelements*sizeof(void*);
+ oidrdlocked=malloc(size);
+ oidrdversion=malloc(size);
+ }
+#else
+ if (c_numelements<200) {
+ oidrdlocked=rdlocked;
+ oidrdversion=rdversion;
+ oidwrlocked=wrlocked;
+ } else {
+ int size=c_numelements*sizeof(void*);
+ oidrdlocked=malloc(size);
+ oidrdversion=malloc(size);
+ oidwrlocked=malloc(size);
+ }
+#endif
+ chashlistnode_t *curr = c_list;
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(likely(curr != NULL)) {
+ //if the first bin in hash table is empty
+ objheader_t * headeraddr=&((objheader_t *) curr->val)[-1];
+ objheader_t *header=(objheader_t *)(((char *)curr->key)-sizeof(objheader_t));
+ unsigned int version = headeraddr->version;
+
+ if(STATUS(headeraddr) & DIRTY) {
+ /* Read from the main heap and compare versions */
+ if(likely(write_trylock(&header->lock))) { //can aquire write lock
+ if (likely(version == header->version)) { /* versions match */
+ /* Keep track of objects locked */
+ oidwrlocked[numoidwrlocked++] = header;
+ } else {
+ oidwrlocked[numoidwrlocked++] = header;
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+ getTotalAbortCount2((void *) curr->next, numoidrdlocked, oidrdlocked, oidrdversion);
+#endif
+ DEBUGSTM("WR Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("WR Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ return TRANS_ABORT;
+ }
+ } else { /* cannot aquire lock */
+ if(version == header->version) {
+ /* versions match */
+ softabort=1;
+ }
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+#if defined(STMSTATS)||defined(SOFTABORT)
+ if(getTotalAbortCount2((void *) curr->next, numoidrdlocked, oidrdlocked, oidrdversion))
+ softabort=0;
+#endif
+ DEBUGSTM("WR Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("WR Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+ }
+ } else {
+ /* Read from the main heap and compare versions */
+ oidrdversion[numoidrdlocked]=version;
+ oidrdlocked[numoidrdlocked++] = header;
+ }
+ curr = curr->lnext;
+ }
+
+#ifdef DELAYCOMP
+ //acquire other locks
+ unsigned int numoidwrtotal=numoidwrlocked;
+ chashlistnode_t *dc_curr = dc_c_list;
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(likely(dc_curr != NULL)) {
+ //if the first bin in hash table is empty
+ objheader_t * headeraddr=&((objheader_t *) dc_curr->val)[-1];
+ objheader_t *header=(objheader_t *)(((char *)dc_curr->key)-sizeof(objheader_t));
+ if(write_trylock(&header->lock)) { //can aquire write lock
+ oidwrlocked[numoidwrtotal++] = header;
+ } else {
+ //maybe we already have lock
+ void * key=dc_curr->key;
+ chashlistnode_t *node = &c_table[(((unsigned INTPTR)key) & c_mask)>>4];
+
+ do {
+ if(node->key == key) {
+ objheader_t * headeraddr=&((objheader_t *) node->val)[-1];
+ if(STATUS(headeraddr) & DIRTY) {
+ goto nextloop;
+ }
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ //have to abort to avoid deadlock
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+#if defined(STMSTATS)||defined(SOFTABORT)
+ if(getTotalAbortCount2((void *) curr->next, numoidrdlocked, oidrdlocked, oidrdversion))
+ softabort=0;
+#endif
+ DEBUGSTM("WR Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("WR Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+ }
+ nextloop:
+ dc_curr = dc_curr->lnext;
+ }
+#endif
+
+ //THIS IS THE SERIALIZATION END POINT (START POINT IS END OF EXECUTION)*****
+
+ for(i=0; i<numoidrdlocked; i++) {
+ objheader_t * header=oidrdlocked[i];
+ unsigned int version=oidrdversion[i];
+ if(header->lock>=0) {
+ if(version != header->version) {
+#ifdef DELAYCOMP
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#else
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#endif
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+ getReadAbortCount(i+1, numoidrdlocked, oidrdlocked, oidrdversion);
+#endif
+ DEBUGSTM("RD Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("RD Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ return TRANS_ABORT;
+ }
+#ifdef DELAYCOMP
+ //TODO: check to see if we already have lock
+#endif
+ } else { /* cannot aquire lock */
+ if(version == header->version) {
+ softabort=1;
+ }
+#ifdef DELAYCOMP
+ transAbortProcess(oidwrlocked, numoidwrtotal);
+#else
+ transAbortProcess(oidwrlocked, numoidwrlocked);
+#endif
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+#if defined(STMSTATS)||defined(SOFTABORT)
+ if(getReadAbortCount(i+1, numoidrdlocked, oidrdlocked, oidrdversion))
+ softabort=0;
+#endif
+ DEBUGSTM("RD Abort: rd: %u wr: %u tot: %u type: %u ver: %u\n", numoidrdlocked, numoidwrlocked, c_numelements, TYPE(header), header->version);
+ DEBUGSTMSTAT("RD Abort: Access Count: %u AbortCount: %u type: %u ver: %u \n", header->accessCount, header->abortCount, TYPE(header), header->version);
+ freearrays;
+ if (softabort)
+ return TRANS_SOFT_ABORT;
+ else
+ return TRANS_ABORT;
+ }
+ }
+
+ /* Decide the final response */
+#ifdef DELAYCOMP
+ transCommitProcess(oidwrlocked, numoidwrlocked, numoidwrtotal, commitmethod, primitives, locals, params);
+#else
+ transCommitProcess(oidwrlocked, numoidwrlocked);
+#endif
+ DEBUGSTM("Commit: rd: %u wr: %u tot: %u\n", numoidrdlocked, numoidwrlocked, c_numelements);
+ freearrays;
+ return TRANS_COMMIT;
+}
+
+/* ==================================
+ * transAbortProcess
+ *
+ * =================================
+ */
+void transAbortProcess(void **oidwrlocked, int numoidwrlocked) {
+ int i;
+ objheader_t *header;
+ /* Release read locks */
+
+ /* Release write locks */
+ for(i=numoidwrlocked-1; i>=0; i--) {
+ /* Read from the main heap */
+ header = (objheader_t *)oidwrlocked[i];
+ write_unlock(&header->lock);
+ }
+
+#ifdef STMSTATS
+ /* clear trec and then release objects locked */
+ struct objlist *ptr=lockedobjs;
+ while(ptr!=NULL) {
+ int max=ptr->offset;
+ for(i=max-1; i>=0; i--) {
+ header = (objheader_t *)ptr->objs[i];
+ header->trec = NULL;
+ pthread_mutex_unlock(header->objlock);
+ }
+ ptr=ptr->next;
+ }
+#endif
+}
+
+/* ==================================
+ * transCommitProcess
+ *
+ * =================================
+ */
+#ifdef DELAYCOMP
+ void transCommitProcess(void ** oidwrlocked, int numoidwrlocked, int numoidwrtotal, void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params) {
+#else
+ void transCommitProcess(void ** oidwrlocked, int numoidwrlocked) {
+#endif
+ objheader_t *header;
+ void *ptrcreate;
+ int i;
+ struct objlist *ptr=newobjs;
+ while(ptr!=NULL) {
+ int max=ptr->offset;
+ for(i=0; i<max; i++) {
+ //clear the new flag
+ ((struct ___Object___ *)ptr->objs[i])->___objstatus___=0;
+ }
+ ptr=ptr->next;
+ }
+
+ /* Copy from transaction cache -> main object store */
+ for (i = numoidwrlocked-1; i >=0; i--) {
+ /* Read from the main heap */
+ header = (objheader_t *)oidwrlocked[i];
+ int tmpsize;
+ GETSIZE(tmpsize, header);
+ struct ___Object___ *dst=(struct ___Object___*)(((char *)oidwrlocked[i])+sizeof(objheader_t));
+ struct ___Object___ *src=t_chashSearch(dst);
+ dst->___cachedCode___=src->___cachedCode___;
+ dst->___cachedHash___=src->___cachedHash___;
+ A_memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
+ __asm__ __volatile__("": : :"memory");
+#ifndef DELAYCOMP
+ header->version++;
+#endif
+ }
+ __asm__ __volatile__("": : :"memory");
+
+#ifdef DELAYCOMP
+ // call commit method
+ ptrstack.count=0;
+ primstack.count=0;
+ branchstack.count=0;
+ commitmethod(params, locals, primitives);
+#endif
+
+ /* Release write locks */
+#ifdef DELAYCOMP
+ for(i=numoidwrtotal-1; i>=0; i--) {
+#else
+ for(i=numoidwrlocked-1; i>=0; i--) {
+#endif
+ header = (objheader_t *)oidwrlocked[i];
+#ifdef DELAYCOMP
+ header->version++;
+#endif
+ write_unlock(&header->lock);
+ }
+
+#ifdef STMSTATS
+ /* clear trec and then release objects locked */
+ ptr=lockedobjs;
+ while(ptr!=NULL) {
+ int max=ptr->offset;
+ for(i=max-1; i>=0; i--) {
+ header = (objheader_t *)ptr->objs[i];
+ header->trec = NULL;
+ pthread_mutex_unlock(header->objlock);
+ }
+ ptr=ptr->next;
+ }
+#endif
+}
+
+#if defined(STMSTATS)||defined(SOFTABORT)
+/** ========================================================================================
+ * getTotalAbortCount (for traverseCache only)
+ * params : start: start index of the loop
+ * : stop: stop index of the loop
+ * : startptr: pointer that points to where to start looking in the cache hash table
+ * : numoidrdlocked : number of objects read that are locked
+ * : oidrdlocked : array of objects read and currently locked
+ * : oidrdversion : array of versions of object read
+ * =========================================================================================
+ **/
+int getTotalAbortCount(int start, int stop, void *startptr, int numoidrdlocked, void *oidrdlocked, int *oidrdversion) {
+ int i;
+ int hardabort=0;
+ int isFirstTime=0;
+ chashlistnode_t *curr = (chashlistnode_t *) startptr;
+ chashlistnode_t *ptr = c_table;
+ /* First go through all objects left in the cache that have not been covered yet */
+ for(i = start; i < stop; i++) {
+ if(!isFirstTime)
+ curr = &ptr[i];
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(curr != NULL) {
+ if(curr->key == NULL)
+ break;
+ objheader_t * headeraddr=&((objheader_t *) curr->val)[-1];
+ objheader_t *header=(objheader_t *)(((char *)curr->key)-sizeof(objheader_t));
+ unsigned int version = headeraddr->version;
+ /* versions do not match */
+ if(version != header->version) {
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+ hardabort=1;
+ }
+ curr = curr->next;
+ }
+ isFirstTime = 1;
+ }
+
+ /* Then go through all objects that are read and are currently present in the readLockedArray */
+ if(numoidrdlocked>0) {
+ for(i=0; i<numoidrdlocked; i++) {
+ objheader_t *header = ((void **)oidrdlocked)[i];
+ unsigned int version = oidrdversion[i];
+ if(version != header->version) { /* versions do not match */
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+ hardabort=1;
+ }
+ }
+ }
+
+ return hardabort;
+}
+
+/** ========================================================================================
+ * getTotalAbortCount2 (for alttraverseCache only)
+ * params : startptr: pointer that points to where to start looking in the cache hash table
+ * : numoidrdlocked : number of objects read that are locked
+ * : oidrdlocked : array of objects read and currently locked
+ * : oidrdversion : array of versions of object read
+ * =========================================================================================
+ **/
+int getTotalAbortCount2(void *startptr, int numoidrdlocked, void *oidrdlocked, int *oidrdversion) {
+ int hardabort=0;
+ chashlistnode_t *curr = (chashlistnode_t *) startptr;
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(curr != NULL) {
+ objheader_t *headeraddr=&((objheader_t *) curr->val)[-1];
+ objheader_t *header=(objheader_t *)(((char *)curr->key)-sizeof(objheader_t));
+ unsigned int version = headeraddr->version;
+ /* versions do not match */
+ if(version != header->version) {
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+ hardabort=1;
+ }
+ curr = curr->next;
+ }
+
+ /* Then go through all objects that are read and are currently present in the readLockedArray */
+ if(numoidrdlocked>0) {
+ int i;
+ for(i=0; i<numoidrdlocked; i++) {
+ objheader_t *header = ((void **)oidrdlocked)[i];
+ unsigned int version = oidrdversion[i];
+ if(version != header->version) { /* versions do not match */
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+ hardabort=1;
+ }
+ }
+ }
+
+ return hardabort;
+}
+
+/**
+ * getReadAbortCount : Tells the number of aborts caused by objects that are read by
+ * visiting the read array
+ * params: int start, int stop are indexes to readLocked array
+ * void *oidrdlocked = readLocked array
+ * int *oidrdversion = version array
+ **/
+int getReadAbortCount(int start, int stop, void *oidrdlocked, int *oidrdversion) {
+ int i;
+ int hardabort=0;
+ /* Go through oids read that are locked */
+ for(i = start; i < stop; i++) {
+ objheader_t *header = ((void **)oidrdlocked)[i];
+ unsigned int version = oidrdversion[i];
+ if(version != header->version) { /* versions do not match */
+#ifdef STMSTATS
+ ABORTCOUNT(header);
+ (typesCausingAbort[TYPE(header)])++;
+#endif
+ hardabort=1;
+ }
+ }
+ return hardabort;
+}
+
+/**
+ * needLock
+ * params: Object header, ptr to garbage collector
+ * Locks an object that causes aborts
+ **/
+objheader_t * needLock(objheader_t *header, void *gl) {
+ int lockstatus;
+ threadrec_t *ptr;
+ while((lockstatus = pthread_mutex_trylock(header->objlock))
+ && ((ptr = header->trec) == NULL)) { //retry
+ ;
+ }
+ if(lockstatus==0) { //acquired lock
+ /* Set trec */
+ header->trec = trec;
+ } else { //failed to get lock
+ trec->blocked=1;
+ //memory barrier
+ __asm__ __volatile__("":::"memory");
+ //see if other thread is blocked
+ if(ptr->blocked == 1) {
+ //it might be block, so ignore lock and clear our blocked flag
+ trec->blocked=0;
+ return;
+ } else {
+#ifdef PRECISE_GC
+ INTPTR ptrarray[]={1, (INTPTR)gl, (INTPTR) header};
+ void *lockptr=header->objlock;
+ stopforgc((struct garbagelist *)ptrarray);
+ //grab lock and wait our turn
+ pthread_mutex_lock(lockptr);
+ restartaftergc();
+ header=(objheader_t *) ptrarray[2];
+#else
+ pthread_mutex_lock(header->objptr);
+#endif
+ /* we have lock, so we are not blocked anymore */
+ trec->blocked = 0;
+ /* Set our trec */
+ header->trec = trec;
+ }
+ }
+ //trec->blocked is zero now
+
+ /* Save the locked object */
+ if (lockedobjs->offset<MAXOBJLIST) {
+ lockedobjs->objs[lockedobjs->offset++]=header;
+ } else {
+ struct objlist *tmp=malloc(sizeof(struct objlist));
+ tmp->next=lockedobjs;
+ tmp->objs[0]=header;
+ tmp->offset=1;
+ lockedobjs=tmp;
+ }
+ return header;
+}
+
+#endif
--- /dev/null
+#include "sockpool.h"
+#include <netinet/tcp.h>
+
+#ifdef RECOVERY
+#define TIMEOUT_TIME 3
+#endif
+
+#if defined(__i386__)
+inline int test_and_set(volatile unsigned int *addr) {
+ int oldval;
+ /* Note: the "xchg" instruction does not need a "lock" prefix */
+ __asm__ __volatile__ ("xchgl %0, %1"
+ : "=r" (oldval), "=m" (*(addr))
+ : "0" (1), "m" (*(addr)));
+ return oldval;
+}
+inline void UnLock(volatile unsigned int *addr) {
+ int oldval;
+ /* Note: the "xchg" instruction does not need a "lock" prefix */
+ __asm__ __volatile__ ("xchgl %0, %1"
+ : "=r" (oldval), "=m" (*(addr))
+ : "0" (0), "m" (*(addr)));
+}
+#elif
+# error need implementation of test_and_set
+#endif
+
+#define MAXSPINS 4
+
+inline void Lock(volatile unsigned int *s) {
+ while(test_and_set(s)) {
+ int i=0;
+ while(*s) {
+ if (i++>MAXSPINS) {
+ sched_yield();
+ i=0;
+ }
+ }
+ }
+}
+
+sockPoolHashTable_t *createSockPool(sockPoolHashTable_t * sockhash, unsigned int size) {
+ if((sockhash = calloc(1, sizeof(sockPoolHashTable_t))) == NULL) {
+ printf("Calloc error at %s line %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+
+ socknode_t **nodelist;
+ if ((nodelist = calloc(size, sizeof(socknode_t *))) < 0) {
+ printf("Calloc error at %s line %d\n", __FILE__, __LINE__);
+ free(sockhash);
+ return NULL;
+ }
+
+ sockhash->table = nodelist;
+ sockhash->inuse = NULL;
+ sockhash->size = size;
+ sockhash->mask = size - 1;
+ sockhash->mylock=0;
+
+ return sockhash;
+}
+
+int createNewSocket(unsigned int mid) {
+ int sd;
+ int flag=1;
+ if((sd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ printf("%s() Error: In creating socket at %s, %d\n", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+#ifdef RECOVERY
+#ifdef DEBUG
+ printf("%s-> Setting timeouts for sd:%d\n", __func__, sd);
+#endif
+ struct timeval tv;
+ tv.tv_sec = TIMEOUT_TIME;
+ tv.tv_usec = 0;
+ setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO, (struct timeval *)&tv, sizeof(tv));
+ setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (struct timeval *)&tv, sizeof(tv));
+#endif
+ setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(flag));
+ struct sockaddr_in remoteAddr;
+ bzero(&remoteAddr, sizeof(remoteAddr));
+ remoteAddr.sin_family = AF_INET;
+ remoteAddr.sin_port = htons(LISTEN_PORT);
+ remoteAddr.sin_addr.s_addr = htonl(mid);
+ if(connect(sd, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
+ perror("socket connect: ");
+ printf("%s(): Error %d connecting to %s:%d\n", __func__, errno, inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
+ close(sd);
+ return -1;
+ }
+ return sd;
+}
+
+int getSockWithLock(sockPoolHashTable_t *sockhash, unsigned int mid) {
+ socknode_t **ptr;
+ int key = mid&(sockhash->mask);
+ int sd;
+
+ Lock(&sockhash->mylock);
+ ptr=&(sockhash->table[key]);
+
+ while(*ptr!=NULL) {
+ if (mid == (*ptr)->mid) {
+ socknode_t *tmp=*ptr;
+ sd = tmp->sd;
+ *ptr=tmp->next;
+ tmp->next=sockhash->inuse;
+ sockhash->inuse=tmp;
+ UnLock(&sockhash->mylock);
+ return sd;
+ }
+ ptr=&((*ptr)->next);
+ }
+ UnLock(&sockhash->mylock);
+ if((sd = createNewSocket(mid)) != -1) {
+ socknode_t *inusenode = calloc(1, sizeof(socknode_t));
+ inusenode->sd = sd;
+ inusenode->mid = mid;
+ insToListWithLock(sockhash, inusenode);
+ printf("returning sd:%d\n", sd);
+ return sd;
+ } else {
+ return -1;
+ }
+}
+
+int getSock(sockPoolHashTable_t *sockhash, unsigned int mid) {
+ socknode_t **ptr;
+ int key = mid&(sockhash->mask);
+ int sd;
+
+ ptr=&(sockhash->table[key]);
+
+ while(*ptr!=NULL) {
+ if (mid == (*ptr)->mid) {
+ socknode_t *tmp=*ptr;
+ sd = tmp->sd;
+ *ptr=tmp->next;
+ tmp->next=sockhash->inuse;
+ sockhash->inuse=tmp;
+ return sd;
+ }
+ ptr=&((*ptr)->next);
+ }
+ if((sd = createNewSocket(mid)) != -1) {
+ socknode_t *inusenode = calloc(1, sizeof(socknode_t));
+ inusenode->next=sockhash->inuse;
+ sockhash->inuse=inusenode;
+ return sd;
+ } else {
+ return -1;
+ }
+}
+
+int getSock2(sockPoolHashTable_t *sockhash, unsigned int mid) {
+ socknode_t **ptr;
+ int key = mid&(sockhash->mask);
+ int sd;
+
+ ptr=&(sockhash->table[key]);
+
+ while(*ptr!=NULL) {
+ if (mid == (*ptr)->mid) {
+ return (*ptr)->sd;
+ }
+ ptr=&((*ptr)->next);
+ }
+ if((sd = createNewSocket(mid)) != -1) {
+ *ptr=calloc(1, sizeof(socknode_t));
+ (*ptr)->mid=mid;
+ (*ptr)->sd=sd;
+ return sd;
+ } else {
+ return -1;
+ }
+}
+
+/*socket pool with multiple TR threads asking to connect to same machine */
+int getSock2WithLock(sockPoolHashTable_t *sockhash, unsigned int mid) {
+ socknode_t **ptr;
+ int key = mid&(sockhash->mask);
+ int sd;
+
+ Lock(&sockhash->mylock);
+ ptr=&(sockhash->table[key]);
+ while(*ptr!=NULL) {
+ if (mid == (*ptr)->mid) {
+ UnLock(&sockhash->mylock);
+ printf("RETURNING SD\n");
+ return (*ptr)->sd;
+ }
+ ptr=&((*ptr)->next);
+ }
+ UnLock(&sockhash->mylock);
+ if((sd = createNewSocket(mid)) != -1) {
+ socknode_t *inusenode = calloc(1, sizeof(socknode_t));
+ inusenode->sd = sd;
+ inusenode->mid = mid;
+ addSockWithLock(sockhash, inusenode);
+ printf("RETURNING NEW SD\n");
+ return sd;
+ } else {
+ return -1;
+ }
+}
+
+void addSockWithLock(sockPoolHashTable_t *sockhash, socknode_t *ptr) {
+ int key = ptr->mid&(sockhash->mask);
+ Lock(&sockhash->mylock);
+ ptr->next = sockhash->table[key];
+ sockhash->table[key] = ptr;
+ UnLock(&sockhash->mylock);
+}
+
+void insToListWithLock(sockPoolHashTable_t *sockhash, socknode_t *inusenode) {
+ Lock(&sockhash->mylock);
+ inusenode->next = sockhash->inuse;
+ sockhash->inuse = inusenode;
+ UnLock(&sockhash->mylock);
+}
+
+void freeSock(sockPoolHashTable_t *sockhash, unsigned int mid, int sd) {
+ int key = mid&(sockhash->mask);
+ socknode_t *ptr = sockhash->inuse;
+ sockhash->inuse = ptr->next;
+ ptr->mid = mid;
+ ptr->sd = sd;
+ ptr->next = sockhash->table[key];
+ sockhash->table[key] = ptr;
+}
+
+void freeSockWithLock(sockPoolHashTable_t *sockhash, unsigned int mid, int sd) {
+ int key = mid&(sockhash->mask);
+ socknode_t *ptr;
+ Lock(&sockhash->mylock);
+ ptr = sockhash->inuse;
+ sockhash->inuse = ptr->next;
+ ptr->mid = mid;
+ ptr->sd = sd;
+ ptr->next = sockhash->table[key];
+ sockhash->table[key] = ptr;
+ UnLock(&sockhash->mylock);
+}
+
+#if 0
+/***************************************
+ * Array Implementation for socket reuse
+ ****************************************/
+
+int num_machines;
+
+sock_pool_t *initSockPool(unsigned int *mid, int machines) {
+ sock_pool_t *sockpool;
+ num_machines = machines;
+ if ((sockpool = calloc(num_machines, sizeof(sock_pool_t))) < 0) {
+ printf("%s(), Calloc error at %s, line %d\n", __func__, __FILE__, __LINE__);
+ return NULL;
+ }
+ int i;
+ for (i = 0; i < num_machines; i++) {
+ if ((sockpool[i].sd = calloc(MAX_CONN_PER_MACHINE, sizeof(int))) < 0) {
+ printf("%s(), Calloc error at %s, line %d\n", __func__, __FILE__, __LINE__);
+ return NULL;
+ }
+ if ((sockpool[i].inuse = calloc(MAX_CONN_PER_MACHINE, sizeof(char))) < 0) {
+ printf("%s(), Calloc error at %s, line %d\n", __func__, __FILE__, __LINE__);
+ return NULL;
+ }
+ sockpool[i].mid = mid[i];
+ int j;
+ for(j = 0; j < MAX_CONN_PER_MACHINE; j++) {
+ sockpool[i].sd[j] = -1;
+ }
+ }
+
+ return sockpool;
+}
+
+int getSock(sock_pool_t *sockpool, unsigned int mid) {
+ int i;
+ for (i = 0; i < num_machines; i++) {
+ if (sockpool[i].mid == mid) {
+ int j;
+ for (j = 0; j < MAX_CONN_PER_MACHINE; j++) {
+ if (sockpool[i].sd[j] != -1 && (sockpool[i].inuse[j] == 0)) {
+ sockpool[i].inuse[j] = 1;
+ return sockpool[i].sd[j];
+ }
+ if (sockpool[i].sd[j] == -1) {
+ //Open Connection
+ int sd;
+ if((sd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ printf("%s() Error: In creating socket at %s, %d\n", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+ struct sockaddr_in remoteAddr;
+ bzero(&remoteAddr, sizeof(remoteAddr));
+ remoteAddr.sin_family = AF_INET;
+ remoteAddr.sin_port = htons(LISTEN_PORT);
+ remoteAddr.sin_addr.s_addr = htonl(mid);
+
+ if(connect(sd, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
+ printf("%s(): Error %d connecting to %s:%d\n", __func__, errno, inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
+ close(sd);
+ return -1;
+ }
+ sockpool[i].sd[j] = sd;
+ sockpool[i].inuse[j] = 1;
+ return sockpool[i].sd[j];
+ }
+ }
+ printf("%s()->Error: Less number of MAX_CONN_PER_MACHINE\n", __func__);
+ return -1;
+ }
+ }
+ printf("%s()-> Error: Machine id not found\n", __func__);
+
+ return -1;
+}
+
+int freeSock(sock_pool_t *sockpool, int sd) {
+ int i;
+ for (i = 0; i < num_machines; i++) {
+ int j;
+ for (j = 0; j < MAX_CONN_PER_MACHINE; j++) {
+ if (sockpool[i].sd[j] == sd) {
+ sockpool[i].inuse[j] = 0;
+ return 0;
+ }
+ }
+ }
+ printf("%s() Error: Illegal socket descriptor %d\n", __func__, sd);
+
+ return -1;
+}
+
+#endif
--- /dev/null
+#ifndef _SOCKPOOL_H_
+#define _SOCKPOOL_H_
+
+#include "dstm.h"
+#include "ip.h"
+#define DEFAULTSOCKPOOLSIZE 128
+
+int test_and_set(volatile unsigned int *addr);
+void UnLock(volatile unsigned int *addr);
+
+typedef struct socknode {
+ int sd;
+ unsigned int mid;
+ struct socknode *next;
+} socknode_t;
+
+typedef struct sockPoolHashTable {
+ socknode_t **table;
+ socknode_t *inuse;
+ unsigned int size;
+ unsigned int mask;
+ volatile unsigned int mylock;
+} sockPoolHashTable_t;
+
+void addSockWithLock(sockPoolHashTable_t *sockhash, socknode_t *ptr);
+sockPoolHashTable_t *createSockPool(sockPoolHashTable_t *, unsigned int);
+int getSock(sockPoolHashTable_t *, unsigned int);
+int getSock2(sockPoolHashTable_t *, unsigned int);
+int getSock2WithLock(sockPoolHashTable_t *h, unsigned int);
+int getSockWithLock(sockPoolHashTable_t *, unsigned int);
+void freeSock(sockPoolHashTable_t *, unsigned int, int);
+void freeSockWithLock(sockPoolHashTable_t *, unsigned int, int);
+void insToList(sockPoolHashTable_t *, socknode_t *);
+void insToListWithLock(sockPoolHashTable_t *, socknode_t *);
+int createNewSocket(unsigned int);
+
+#if 0
+/************************************************
+ * Array Implementation data structures
+ ***********************************************/
+#define MAX_CONN_PER_MACHINE 10
+typedef struct sock_pool {
+ unsigned int mid;
+ int *sd;
+ char *inuse;
+} sock_pool_t;
+
+sock_pool_t *initSockPool(unsigned int *, int);
+int getSock(sock_pool_t *, unsigned int);
+int freeSock(sock_pool_t *, int);
+#endif
+
+#endif
--- /dev/null
+#include "stmlock.h"
+#include <stdio.h>
+
+
+inline void initdsmlocks(volatile unsigned int *addr) {
+ (*addr) = RW_LOCK_BIAS;
+}
+
+int write_trylock(volatile unsigned int *lock) {
+ int retval=0;
+ __asm__ __volatile__("xchgl %0,%1"
+ : "=r"(retval)
+ : "m"(*__xg(lock)), "0"(retval)
+ : "memory");
+ return retval;
+}
+
+void write_unlock(volatile unsigned int *lock) {
+ __asm __volatile__("movl $1, %0" : "+m" (*__xg(lock))::"memory");
+}
+
--- /dev/null
+#ifndef _STMLOCK_H_
+#define _STMLOCK_H_
+
+#define RW_LOCK_BIAS 1
+#define LOCK_UNLOCKED { LOCK_BIAS }
+
+struct __xchg_dummy {
+ unsigned long a[100];
+};
+
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+void initdsmlocks(volatile unsigned int *addr);
+int write_trylock(volatile unsigned int *lock);
+void write_unlock(volatile unsigned int *lock);
+
+/*
+static inline void initdsmlocks(volatile unsigned int *addr) {
+ (*addr) = RW_LOCK_BIAS;
+}
+
+static inline int write_trylock(volatile unsigned int *lock) {
+ int retval=0;
+ __asm__ __volatile__("xchgl %0,%1"
+ : "=r"(retval)
+ : "m"(*__xg(lock)), "0"(retval)
+ : "memory");
+ return retval;
+}
+
+static inline void write_unlock(volatile unsigned int *lock) {
+ __asm __volatile__("movl $1, %0" : "+m" (*__xg(lock))::"memory");
+}
+*/
+
+#endif
--- /dev/null
+#include "stmlookup.h"
+#include "strings.h"
+
+__thread chashlistnode_t *c_table;
+__thread chashlistnode_t *c_list;
+__thread unsigned int c_size;
+__thread unsigned INTPTR c_mask;
+__thread unsigned int c_numelements;
+__thread unsigned int c_threshold;
+__thread double c_loadfactor;
+__thread cliststruct_t *c_structs;
+
+#ifdef DELAYCOMP
+__thread chashlistnode_t *dc_c_table;
+__thread chashlistnode_t *dc_c_list;
+__thread unsigned int dc_c_size;
+__thread unsigned INTPTR dc_c_mask;
+__thread unsigned int dc_c_numelements;
+__thread unsigned int dc_c_threshold;
+__thread double dc_c_loadfactor;
+__thread cliststruct_t *dc_c_structs;
+
+void dc_t_chashCreate(unsigned int size, double loadfactor) {
+ chashtable_t *ctable;
+ chashlistnode_t *nodes;
+ int i;
+
+ // Allocate space for the hash table
+
+ dc_c_table = calloc(size, sizeof(chashlistnode_t));
+ dc_c_loadfactor = loadfactor;
+ dc_c_size = size;
+ dc_c_threshold=size*loadfactor;
+ dc_c_mask = (size << 4)-1;
+ dc_c_structs=calloc(1, sizeof(cliststruct_t));
+ dc_c_numelements = 0; // Initial number of elements in the hash
+ dc_c_list=NULL;
+}
+
+void dc_t_chashreset() {
+ chashlistnode_t *ptr = dc_c_table;
+ int i;
+
+ if (dc_c_numelements<(dc_c_size>>4)) {
+ chashlistnode_t *top=&ptr[dc_c_size];
+ chashlistnode_t *tmpptr=dc_c_list;
+ while(tmpptr!=NULL) {
+ chashlistnode_t *next=tmpptr->lnext;
+ if (tmpptr>=ptr&&tmpptr<top) {
+ //zero in list
+ tmpptr->key=0;
+ tmpptr->next=NULL;
+ }
+ tmpptr=next;
+ }
+ } else {
+ bzero(dc_c_table, sizeof(chashlistnode_t)*dc_c_size);
+ }
+ while(dc_c_structs->next!=NULL) {
+ cliststruct_t *next=dc_c_structs->next;
+ free(dc_c_structs);
+ dc_c_structs=next;
+ }
+ dc_c_structs->num = 0;
+ dc_c_numelements = 0;
+ dc_c_list=NULL;
+}
+
+//Store objects and their pointers into hash
+void dc_t_chashInsertOnce(void * key, void *val) {
+ chashlistnode_t *ptr;
+
+ if (key==NULL)
+ return;
+
+ if(dc_c_numelements > (dc_c_threshold)) {
+ //Resize
+ unsigned int newsize = dc_c_size << 1;
+ dc_t_chashResize(newsize);
+ }
+
+ ptr = &dc_c_table[(((unsigned INTPTR)key)&dc_c_mask)>>4];
+
+ if(ptr->key==0) {
+ ptr->key=key;
+ ptr->val=val;
+ ptr->lnext=dc_c_list;
+ dc_c_list=ptr;
+ dc_c_numelements++;
+ } else { // Insert in the beginning of linked list
+ chashlistnode_t * node;
+ chashlistnode_t *search=ptr;
+
+ //make sure it isn't here
+ do {
+ if(search->key == key) {
+ return;
+ }
+ search=search->next;
+ } while(search != NULL);
+
+ dc_c_numelements++;
+ if (dc_c_structs->num<NUMCLIST) {
+ node=&dc_c_structs->array[dc_c_structs->num];
+ dc_c_structs->num++;
+ } else {
+ //get new list
+ cliststruct_t *tcl=calloc(1,sizeof(cliststruct_t));
+ tcl->next=dc_c_structs;
+ dc_c_structs=tcl;
+ node=&tcl->array[0];
+ tcl->num=1;
+ }
+ node->key = key;
+ node->val = val;
+ node->next = ptr->next;
+ ptr->next=node;
+ node->lnext=dc_c_list;
+ dc_c_list=node;
+ }
+}
+
+unsigned int dc_t_chashResize(unsigned int newsize) {
+ chashlistnode_t *node, *ptr, *curr; // curr and next keep track of the current and the next chashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the chashlistnode_t for each bin in hashtable
+ unsigned int i,index;
+ unsigned int mask;
+
+ ptr = dc_c_table;
+ oldsize = dc_c_size;
+ dc_c_list=NULL;
+
+ if((node = calloc(newsize, sizeof(chashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ dc_c_table = node; //Update the global hashtable upon resize()
+ dc_c_size = newsize;
+ dc_c_threshold = newsize * dc_c_loadfactor;
+ mask=dc_c_mask = (newsize << 4)-1;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ do { //Inner loop to go through linked lists
+ void * key;
+ chashlistnode_t *tmp,*next;
+
+ if ((key=curr->key) == 0) { //Exit inner loop if there the first element is 0
+ break; //key = val =0 for element if not present within the hash table
+ }
+ index = (((unsigned INTPTR)key) & mask) >>4;
+ tmp=&node[index];
+ next = curr->next;
+ // Insert into the new table
+ if(tmp->key == 0) {
+ tmp->key = key;
+ tmp->val = curr->val;
+ tmp->lnext=dc_c_list;
+ dc_c_list=tmp;
+ } /*
+ NOTE: Add this case if you change this...
+ This case currently never happens because of the way things rehash....
+ else if (isfirst) {
+ chashlistnode_t *newnode= calloc(1, sizeof(chashlistnode_t));
+ newnode->key = curr->key;
+ newnode->val = curr->val;
+ newnode->next = tmp->next;
+ tmp->next=newnode;
+ } */
+ else {
+ curr->next=tmp->next;
+ tmp->next=curr;
+ curr->lnext=dc_c_list;
+ dc_c_list=curr;
+ }
+
+ isfirst = 0;
+ curr = next;
+ } while(curr!=NULL);
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+//Delete the entire hash table
+void dc_t_chashDelete() {
+ int i;
+ cliststruct_t *ptr=dc_c_structs;
+ while(ptr!=NULL) {
+ cliststruct_t *next=ptr->next;
+ free(ptr);
+ ptr=next;
+ }
+ free(dc_c_table);
+ dc_c_table=NULL;
+ dc_c_structs=NULL;
+ dc_c_list=NULL;
+}
+
+// Search for an address for a given oid
+INLINE void * dc_t_chashSearch(void * key) {
+ //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
+ chashlistnode_t *node = &dc_c_table[(((unsigned INTPTR)key) & dc_c_mask)>>4];
+
+ do {
+ if(node->key == key) {
+ return node->val;
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ return NULL;
+}
+
+#endif
+
+void t_chashCreate(unsigned int size, double loadfactor) {
+ chashtable_t *ctable;
+ chashlistnode_t *nodes;
+ int i;
+
+ // Allocate space for the hash table
+
+
+ c_table = calloc(size, sizeof(chashlistnode_t));
+ c_loadfactor = loadfactor;
+ c_size = size;
+ c_threshold=size*loadfactor;
+ c_mask = (size << 4)-1;
+ c_structs=calloc(1, sizeof(cliststruct_t));
+ c_numelements = 0; // Initial number of elements in the hash
+ c_list=NULL;
+}
+
+void t_chashreset() {
+ chashlistnode_t *ptr = c_table;
+ int i;
+
+ if (c_numelements<(c_size>>4)) {
+ chashlistnode_t *top=&ptr[c_size];
+ chashlistnode_t *tmpptr=c_list;
+ while(tmpptr!=NULL) {
+ chashlistnode_t *next=tmpptr->lnext;
+ if (tmpptr>=ptr&&tmpptr<top) {
+ //zero in list
+ tmpptr->key=0;
+ tmpptr->next=NULL;
+ }
+ tmpptr=next;
+ }
+ } else {
+ bzero(c_table, sizeof(chashlistnode_t)*c_size);
+ }
+ while(c_structs->next!=NULL) {
+ cliststruct_t *next=c_structs->next;
+ free(c_structs);
+ c_structs=next;
+ }
+ c_structs->num = 0;
+ c_numelements = 0;
+ c_list=NULL;
+}
+
+//Store objects and their pointers into hash
+void t_chashInsert(void * key, void *val) {
+ chashlistnode_t *ptr;
+
+
+ if(c_numelements > (c_threshold)) {
+ //Resize
+ unsigned int newsize = c_size << 1;
+ t_chashResize(newsize);
+ }
+
+ ptr = &c_table[(((unsigned INTPTR)key)&c_mask)>>4];
+ c_numelements++;
+
+ if(ptr->key==0) {
+ ptr->key=key;
+ ptr->val=val;
+ ptr->lnext=c_list;
+ c_list=ptr;
+ } else { // Insert in the beginning of linked list
+ chashlistnode_t * node;
+ if (c_structs->num<NUMCLIST) {
+ node=&c_structs->array[c_structs->num];
+ c_structs->num++;
+ } else {
+ //get new list
+ cliststruct_t *tcl=calloc(1,sizeof(cliststruct_t));
+ tcl->next=c_structs;
+ c_structs=tcl;
+ node=&tcl->array[0];
+ tcl->num=1;
+ }
+ node->key = key;
+ node->val = val;
+ node->next = ptr->next;
+ ptr->next=node;
+ node->lnext=c_list;
+ c_list=node;
+ }
+}
+
+// Search for an address for a given oid
+INLINE void * t_chashSearch(void * key) {
+ //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
+ chashlistnode_t *node = &c_table[(((unsigned INTPTR)key) & c_mask)>>4];
+
+ do {
+ if(node->key == key) {
+ return node->val;
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ return NULL;
+}
+
+unsigned int t_chashResize(unsigned int newsize) {
+ chashlistnode_t *node, *ptr, *curr; // curr and next keep track of the current and the next chashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the chashlistnode_t for each bin in hashtable
+ unsigned int i,index;
+ unsigned int mask;
+
+ ptr = c_table;
+ oldsize = c_size;
+ c_list=NULL;
+
+ if((node = calloc(newsize, sizeof(chashlistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ c_table = node; //Update the global hashtable upon resize()
+ c_size = newsize;
+ c_threshold = newsize * c_loadfactor;
+ mask=c_mask = (newsize << 4)-1;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ do { //Inner loop to go through linked lists
+ void * key;
+ chashlistnode_t *tmp,*next;
+
+ if ((key=curr->key) == 0) { //Exit inner loop if there the first element is 0
+ break; //key = val =0 for element if not present within the hash table
+ }
+ index = (((unsigned INTPTR)key) & mask) >>4;
+ tmp=&node[index];
+ next = curr->next;
+ // Insert into the new table
+ if(tmp->key == 0) {
+ tmp->key = key;
+ tmp->val = curr->val;
+ tmp->lnext=c_list;
+ c_list=tmp;
+ } /*
+ NOTE: Add this case if you change this...
+ This case currently never happens because of the way things rehash....
+ else if (isfirst) {
+ chashlistnode_t *newnode= calloc(1, sizeof(chashlistnode_t));
+ newnode->key = curr->key;
+ newnode->val = curr->val;
+ newnode->next = tmp->next;
+ tmp->next=newnode;
+ } */
+ else {
+ curr->next=tmp->next;
+ tmp->next=curr;
+ curr->lnext=c_list;
+ c_list=curr;
+ }
+
+ isfirst = 0;
+ curr = next;
+ } while(curr!=NULL);
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ return 0;
+}
+
+//Delete the entire hash table
+void t_chashDelete() {
+ int i;
+ cliststruct_t *ptr=c_structs;
+ while(ptr!=NULL) {
+ cliststruct_t *next=ptr->next;
+ free(ptr);
+ ptr=next;
+ }
+ free(c_table);
+ c_table=NULL;
+ c_structs=NULL;
+ c_list=NULL;
+}
--- /dev/null
+#ifndef _CLOOKUP_H_
+#define _CLOOKUP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef INTPTR
+#ifdef BIT64
+#define INTPTR long
+#else
+#define INTPTR int
+#endif
+#endif
+
+#define CLOADFACTOR 0.25
+#define CHASH_SIZE 1024
+
+#define INLINE inline __attribute__((always_inline))
+
+
+typedef struct chashlistnode {
+ void * key;
+ void * val; //this can be cast to another type or used to point to a larger structure
+ struct chashlistnode *next;
+ struct chashlistnode *lnext;
+} chashlistnode_t;
+
+typedef struct chashtable {
+ chashlistnode_t *table; // points to beginning of hash table
+ unsigned int size;
+ unsigned int mask;
+ unsigned int numelements;
+ unsigned int threshold;
+ double loadfactor;
+} chashtable_t;
+
+#define NUMCLIST 250
+typedef struct clist {
+ struct chashlistnode array[NUMCLIST];
+ int num;
+ struct clist *next;
+} cliststruct_t;
+
+
+void t_chashCreate(unsigned int size, double loadfactor);
+void t_chashInsert(void * key, void *val);
+void * t_chashSearch(void * key);
+unsigned int t_chashResize(unsigned int newsize);
+void t_chashDelete();
+void t_chashreset();
+
+
+extern __thread chashlistnode_t *c_table;
+extern __thread chashlistnode_t *c_list;
+extern __thread unsigned int c_size;
+extern __thread unsigned INTPTR c_mask;
+extern __thread unsigned int c_numelements;
+extern __thread unsigned int c_threshold;
+extern __thread double c_loadfactor;
+extern __thread cliststruct_t *c_structs;
+
+#ifdef DELAYCOMP
+extern __thread chashlistnode_t *dc_c_table;
+extern __thread chashlistnode_t *dc_c_list;
+extern __thread unsigned int dc_c_size;
+extern __thread unsigned INTPTR dc_c_mask;
+extern __thread unsigned int dc_c_numelements;
+extern __thread unsigned int dc_c_threshold;
+extern __thread double dc_c_loadfactor;
+extern __thread cliststruct_t *dc_c_structs;
+
+void dc_t_chashCreate(unsigned int size, double loadfactor);
+void dc_t_chashInsertOnce(void * key, void *val);
+void * dc_t_chashSearch(void * key);
+unsigned int dc_t_chashResize(unsigned int newsize);
+void dc_t_chashDelete();
+void dc_t_chashreset();
+#endif
+
+#endif
--- /dev/null
+#include "threadnotify.h"
+
+notifyhashtable_t nlookup; //Global hash table
+
+/* This function creates a new node in the linked list of threads waiting
+ * for an update notification from a particular object.
+ * This takes in the head of the linked list and inserts the new node to it */
+threadlist_t *insNode(threadlist_t *head, unsigned int threadid, unsigned int mid) {
+ threadlist_t *ptr;
+ if(head == NULL) {
+ head = malloc(sizeof(threadlist_t));
+ head->threadid = threadid;
+ head->mid = mid;
+ head->next = NULL;
+ } else {
+ ptr = malloc(sizeof(threadlist_t));
+ ptr->threadid = threadid;
+ ptr->mid = mid;
+ ptr->next = head;
+ head = ptr;
+ }
+ return head;
+}
+
+/* This function displays the linked list of threads waiting on update notification
+ * from an object */
+void display(threadlist_t *head) {
+ threadlist_t *ptr;
+ if(head == NULL) {
+ printf("No thread is waiting\n");
+ return;
+ } else {
+ while(head != NULL) {
+ ptr = head;
+ printf("The threadid waiting is = %d\n", ptr->threadid);
+ printf("The mid on which thread present = %d\n", ptr->mid);
+ head = ptr->next;
+ }
+ }
+}
+
+/* This function creates a new hash table that stores a mapping between the threadid and
+ * a pointer to the thread notify data */
+unsigned int notifyhashCreate(unsigned int size, float loadfactor) {
+ notifylistnode_t *nodes = calloc(size, sizeof(notifylistnode_t));
+ nlookup.table = nodes;
+ nlookup.size = size;
+ nlookup.numelements = 0; // Initial number of elements in the hash
+ nlookup.loadfactor = loadfactor;
+ //Initialize the pthread_mutex variable
+ pthread_mutex_init(&nlookup.locktable, NULL);
+ return 0;
+}
+
+// Assign to tids to bins inside hash table
+unsigned int notifyhashFunction(unsigned int tid) {
+ return( tid % (nlookup.size));
+}
+
+// Insert pointer to the notify data and threadid mapping into the hash table
+unsigned int notifyhashInsert(unsigned int tid, notifydata_t *ndata) {
+ unsigned int newsize;
+ int index;
+ notifylistnode_t *ptr, *node, *tmp;
+ int isFound = 0;
+
+ if (nlookup.numelements > (nlookup.loadfactor * nlookup.size)) {
+ //Resize Table
+ newsize = 2 * nlookup.size + 1;
+ pthread_mutex_lock(&nlookup.locktable);
+ notifyhashResize(newsize);
+ pthread_mutex_unlock(&nlookup.locktable);
+ }
+ ptr = nlookup.table;
+ index = notifyhashFunction(tid);
+ pthread_mutex_lock(&nlookup.locktable);
+ if(ptr[index].next == NULL && ptr[index].threadid == 0) {
+ // Insert at the first position in the hashtable
+ ptr[index].threadid = tid;
+ ptr[index].ndata = ndata;
+ } else {
+ tmp = &ptr[index];
+ while(tmp != NULL) {
+ if(tmp->threadid == tid) {
+ isFound = 1;
+ tmp->ndata = ndata;
+ }
+ tmp = tmp->next;
+ }
+ if(!isFound) {
+ if ((node = calloc(1, sizeof(notifylistnode_t))) == NULL) {
+ printf("Calloc error %s, %d\n", __FILE__, __LINE__);
+ pthread_mutex_unlock(&nlookup.locktable);
+ return 1;
+ }
+ node->threadid = tid;
+ node->ndata = ndata;
+ node->next = ptr[index].next;
+ ptr[index].next = node;
+ }
+ }
+ pthread_mutex_unlock(&nlookup.locktable);
+
+ return 0;
+}
+
+// Return pointer to thread notify data for a given threadid in the hash table
+notifydata_t *notifyhashSearch(unsigned int tid) {
+ // Address of the beginning of hash table
+ notifylistnode_t *ptr = nlookup.table;
+ int index = notifyhashFunction(tid);
+ pthread_mutex_lock(&nlookup.locktable);
+ notifylistnode_t * node = &ptr[index];
+ while(node != NULL) {
+ if(node->threadid == tid) {
+ pthread_mutex_unlock(&nlookup.locktable);
+ return node->ndata;
+ }
+ node = node->next;
+ }
+ pthread_mutex_unlock(&nlookup.locktable);
+ return NULL;
+}
+
+// Remove an entry from the hash table
+unsigned int notifyhashRemove(unsigned int tid) {
+ notifylistnode_t *curr, *prev, *node;
+
+ notifylistnode_t *ptr = nlookup.table;
+ int index = notifyhashFunction(tid);
+
+ pthread_mutex_lock(&nlookup.locktable);
+ for (curr = &ptr[index]; curr != NULL; curr = curr->next) {
+ if (curr->threadid == tid) { // Find a match in the hash table
+ nlookup.numelements--; // Decrement the number of elements in the global hashtable
+ if ((curr == &ptr[index]) && (curr->next == NULL)) { // Delete the first item inside the hashtable with no linked list of notifylistnode_t
+ curr->threadid = 0;
+ curr->ndata = NULL;
+ } else if ((curr == &ptr[index]) && (curr->next != NULL)) { //Delete the first bin item with a linked list of notifylistnode_t connected
+ curr->threadid = curr->next->threadid;
+ curr->ndata = curr->next->ndata;
+ node = curr->next;
+ curr->next = curr->next->next;
+ free(node);
+ } else { // Regular delete from linked listed
+ prev->next = curr->next;
+ free(curr);
+ }
+ pthread_mutex_unlock(&nlookup.locktable);
+ return 0;
+ }
+ prev = curr;
+ }
+ pthread_mutex_unlock(&nlookup.locktable);
+ return 1;
+}
+
+// Resize table
+unsigned int notifyhashResize(unsigned int newsize) {
+ notifylistnode_t *node, *ptr, *curr, *next; // curr and next keep track of the current and the next notifyhashlistnodes in a linked list
+ unsigned int oldsize;
+ int isfirst; // Keeps track of the first element in the notifylistnode_t for each bin in hashtable
+ int i,index;
+ notifylistnode_t *newnode;
+
+ ptr = nlookup.table;
+ oldsize = nlookup.size;
+
+ if((node = calloc(newsize, sizeof(notifylistnode_t))) == NULL) {
+ printf("Calloc error %s %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+
+ nlookup.table = node; //Update the global hashtable upon resize()
+ nlookup.size = newsize;
+ nlookup.numelements = 0;
+
+ for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table
+ curr = &ptr[i];
+ isfirst = 1;
+ while (curr != NULL) { //Inner loop to go through linked lists
+ if (curr->threadid == 0) { //Exit inner loop if there the first element for a given bin/index is NULL
+ break; //threadid = threadcond =0 for element if not present within the hash table
+ }
+ next = curr->next;
+ index = notifyhashFunction(curr->threadid);
+#ifdef DEBUG
+ printf("DEBUG(resize) -> index = %d, threadid = %d\n", index, curr->threadid);
+#endif
+ // Insert into the new table
+ if(nlookup.table[index].next == NULL && nlookup.table[index].threadid == 0) {
+ nlookup.table[index].threadid = curr->threadid;
+ nlookup.table[index].ndata = curr->ndata;
+ nlookup.numelements++;
+ } else {
+ if((newnode = calloc(1, sizeof(notifylistnode_t))) == NULL) {
+ printf("Calloc error %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ newnode->threadid = curr->threadid;
+ newnode->ndata = curr->ndata;
+ newnode->next = nlookup.table[index].next;
+ nlookup.table[index].next = newnode;
+ nlookup.numelements++;
+ }
+
+ //free the linked list of notifylistnode_t if not the first element in the hash table
+ if (isfirst != 1) {
+ free(curr);
+ }
+
+ isfirst = 0;
+ curr = next;
+ }
+ }
+
+ free(ptr); //Free the memory of the old hash table
+ ptr = NULL;
+ return 0;
+}
--- /dev/null
+#ifndef _THREADNOTIFY_H_
+#define _THREADNOTIFY_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#define N_LOADFACTOR 0.5
+#define N_HASH_SIZE 20
+
+//Structure to notify object of which other objects/threads are waiting on it
+typedef struct threadlist {
+ unsigned int threadid;
+ unsigned int mid;
+ struct threadlist *next;
+} threadlist_t;
+
+//Structure for objects involved in wait-notify call
+typedef struct notifydata {
+ unsigned int numoid; /* Number of oids on which we are waiting for updated notification */
+ unsigned int threadid; /* The threadid that is waiting for update notification response*/
+ unsigned int *oidarry; /* Pointer to array of oids that this threadid is waiting on*/
+ unsigned short *versionarry; /* Pointer to array of versions of the oids that we are waiting on */
+ pthread_cond_t threadcond; /* Cond variable associated with each threadid that needs to be signaled*/
+ pthread_mutex_t threadnotify;
+} notifydata_t;
+
+typedef struct notifylistnode {
+ unsigned int threadid;
+ notifydata_t *ndata;
+ struct notifylistnode *next;
+} notifylistnode_t;
+
+typedef struct notifyhashtable {
+ notifylistnode_t *table; //Points to beginning of hash table
+ unsigned int size;
+ unsigned int numelements;
+ float loadfactor;
+ pthread_mutex_t locktable; //Lock for the hashtable
+} notifyhashtable_t;
+
+threadlist_t *insNode(threadlist_t *head, unsigned int threadid, unsigned int mid); //Inserts nodes for one object that
+//needs to send notification to threads waiting on it
+void display(threadlist_t *head); // Displays linked list of nodes for one object
+unsigned int notifyhashCreate(unsigned int size, float loadfactor); //returns 1 if hashtable creation is not successful
+unsigned int notifyhashFunction(unsigned int tid); //returns index in the hash table
+unsigned int notifyhashInsert(unsigned int tid, notifydata_t *ndata); //returns 1 if insert not successful
+notifydata_t *notifyhashSearch(unsigned int tid); //returns pointer to notify data, NULL if not found
+unsigned int notifyhashRemove(unsigned int tid); //returns 1 if not successful
+unsigned int notifyhashResize(unsigned int newsize);
+
+#endif
--- /dev/null
+#ifndef _TM_H_
+#define _TM_H_
+#include "runtime.h"
+/* ==================
+ * Control Messages
+ * ==================
+ */
+#define TRANS_SOFT_ABORT 12
+#define TRANS_ABORT 13
+#define TRANS_COMMIT 14
+
+
+/* ========================
+ * Library header files
+ * ========================
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <errno.h>
+#include "stmlookup.h"
+#include "stmlock.h"
+
+/* ==================================
+ * Bit designation for status field
+ * of object header
+ * ==================================
+ */
+#define DIRTY 0x01
+#define NEW 0x02
+#define LOCK 0x04
+
+#ifdef COMPILER
+#include "structdefs.h"
+
+typedef struct threadrec {
+ int blocked;
+} threadrec_t;
+
+typedef struct objheader {
+ unsigned int version;
+ unsigned int lock; /* reader and writer lock for object header */
+#ifdef STMSTATS
+ int abortCount; /* track how many times does this object cause abort */
+ int accessCount; /* track how many times is this object accessed */
+ threadrec_t *trec; /* some thread that locked this object */
+ int riskyflag; /* track how risky is the object */
+ pthread_mutex_t *objlock; /* lock this object */
+ int padding;
+#endif
+} objheader_t;
+
+#define OID(x) \
+ (*((void **)&((struct ___Object___ *)(((char *) x) + sizeof(objheader_t)))->___objlocation___))
+
+#define COMPOID(x) \
+ ((((void *) x )!=NULL) ? (*((void **)&((struct ___Object___ *) x)->___objlocation___)) : NULL)
+
+#define STATUS(x) \
+ *((unsigned int *) &(((struct ___Object___ *)(((char *) x) + sizeof(objheader_t)))->___objstatus___))
+
+#define STATUSPTR(x) \
+ ((unsigned int *) &(((struct ___Object___ *)(((char *) x) + sizeof(objheader_t)))->___objstatus___))
+
+#define TYPE(x) \
+ ((struct ___Object___ *)((char *) x + sizeof(objheader_t)))->type
+
+#define GETSIZE(size, x) { \
+ int type=TYPE(x); \
+ if (type<NUMCLASSES) { \
+ size=classsize[type]; \
+ } else { \
+ size=classsize[type]*((struct ArrayObject *)&((objheader_t *)x)[1])->___length___+sizeof(struct ArrayObject); \
+ } \
+}
+
+#else
+#define OID(x) x->oid
+#define TYPE(x) x->type
+#define STATUS(x) x->status
+#define STATUSPTR(x) &x->status
+#define GETSIZE(size, x) size=classsize[TYPE(x)]
+#endif
+
+
+/* ================================
+ * Constants
+ * ================================
+ */
+#define DEFAULT_OBJ_STORE_SIZE 1048510 //1MB
+#define MAXABORTS 2
+#define NEED_LOCK_THRESHOLD 0.020000
+#define OSUSED(x) (((unsigned INTPTR)(x)->top)-((unsigned INTPTR) (x+1)))
+#define OSFREE(x) ((x)->size-OSUSED(x))
+#define TRANSREAD(x,y,z) { \
+ void * inputvalue; \
+ if ((inputvalue=y)==NULL) x=NULL;\
+ else { \
+ chashlistnode_t * cnodetmp=&c_table[(((unsigned INTPTR)inputvalue)&c_mask)>>4]; \
+ do { \
+ if (cnodetmp->key==inputvalue) {x=cnodetmp->val; break;} \
+ cnodetmp=cnodetmp->next; \
+ if (cnodetmp==NULL) {if (((struct ___Object___*)inputvalue)->___objstatus___&NEW) {x=inputvalue; break;} else \
+ {x=transRead(inputvalue,z); asm volatile ("" : "=m" (c_table),"=m" (c_mask)); break;}} \
+ } while(1); \
+ }}
+
+/* =================================
+ * Data structures
+ * =================================
+ */
+typedef struct objstr {
+ unsigned int size; //this many bytes are allocated after this header
+ void *top;
+ struct objstr *next;
+} objstr_t;
+
+#define MAXOBJLIST 512
+struct objlist {
+ int offset;
+ void * objs[MAXOBJLIST];
+ struct objlist * next;
+};
+
+extern __thread struct objlist * newobjs;
+extern __thread objstr_t *t_cache;
+extern __thread objstr_t *t_reserve;
+#ifdef STMSTATS
+typedef struct objlockstate {
+ int offset;
+ pthread_mutex_t lock[MAXOBJLIST];
+ struct objlockstate *next;
+} objlockstate_t;
+extern __thread threadrec_t *trec;
+extern __thread struct objlist * lockedobjs;
+extern objlockstate_t *objlockscope;
+pthread_mutex_t lockedobjstore;
+#endif
+
+
+/***********************************
+ * Global Variables for statistics
+ **********************************/
+#ifdef TRANSSTATS
+extern int numTransCommit;
+extern int numTransAbort;
+extern int nSoftAbort;
+extern int nSoftAbortAbort;
+extern int nSoftAbortCommit;
+#endif
+
+#ifdef STMSTATS
+extern int typesCausingAbort[];
+#endif
+
+
+/* ================================
+ * Functions used
+ * ================================
+ */
+int stmStartup();
+void objstrReset();
+void objstrDelete(objstr_t *store);
+objstr_t *objstrCreate(unsigned int size);
+void transStart();
+objheader_t *transCreateObj(void * ptr, unsigned int size);
+unsigned int getNewOID(void);
+void *objstrAlloc(unsigned int size);
+__attribute__((pure)) void *transRead(void *, void *);
+#ifdef DELAYCOMP
+int transCommit(void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params);
+int traverseCache(void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params);
+int alttraverseCache(void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params);
+void transCommitProcess(void **, int, int, void (*commitmethod)(void *, void *, void *), void * primitives, void * locals, void * params);
+#else
+int transCommit();
+int traverseCache();
+int alttraverseCache();
+void transCommitProcess(void **, int);
+#endif
+int altalttraverseCache();
+void transAbortProcess(void **, int);
+void randomdelay(int);
+#if defined(STMSTATS)||defined(SOFTABORT)
+int getTotalAbortCount(int, int, void *, int, void*, int*);
+int getTotalAbortCount2(void *, int, void *, int *);
+int getReadAbortCount(int, int, void*, int*);
+#endif
+#ifdef STMSTATS
+objheader_t * needLock(objheader_t *, void *);
+#endif
+#endif
--- /dev/null
+#include "machinepile.h"
+#include "mlookup.h"
+#include "llookup.h"
+#include "plookup.h"
+#include "prelookup.h"
+#include "threadnotify.h"
+#include "queue.h"
+#include "addUdpEnhance.h"
+#include "addPrefetchEnhance.h"
+#include "gCollect.h"
+#include "dsmlock.h"
+#include "prefetch.h"
+#ifdef COMPILER
+#include "thread.h"
+#endif
+#ifdef ABORTREADERS
+#include "abortreaders.h"
+#endif
+#include "trans.h"
+
+#ifdef RECOVERY
+#include <unistd.h>
+#include <signal.h>
+#include <sys/select.h>
+#define WAIT_TIME 3
+#endif
+
+#define NUM_THREADS 1
+#define CONFIG_FILENAME "dstm.conf"
+
+/* Thread transaction variables */
+
+__thread objstr_t *t_cache;
+__thread struct ___Object___ *revertlist;
+#ifdef ABORTREADERS
+__thread int t_abort;
+__thread jmp_buf aborttrans;
+#endif
+
+/* Global Variables */
+extern int classsize[];
+pfcstats_t *evalPrefetch;
+extern int numprefetchsites; //Global variable containing number of prefetch sites
+extern pthread_mutex_t mainobjstore_mutex; // Mutex to lock main Object store
+pthread_mutex_t prefetchcache_mutex; // Mutex to lock Prefetch Cache
+pthread_mutexattr_t prefetchcache_mutex_attr; /* Attribute for lock to make it a recursive lock */
+extern prehashtable_t pflookup; //Global Prefetch cache's lookup table
+pthread_t wthreads[NUM_THREADS]; //Worker threads for working on the prefetch queue
+pthread_t tPrefetch; /* Primary Prefetch thread that processes the prefetch queue */
+extern objstr_t *mainobjstore;
+unsigned int myIpAddr;
+unsigned int *hostIpAddrs;
+int sizeOfHostArray;
+int numHostsInSystem;
+int myIndexInHostArray;
+unsigned int oidsPerBlock;
+unsigned int oidMin;
+unsigned int oidMax;
+
+sockPoolHashTable_t *transReadSockPool;
+sockPoolHashTable_t *transPrefetchSockPool;
+sockPoolHashTable_t *transRequestSockPool;
+pthread_mutex_t notifymutex;
+pthread_mutex_t atomicObjLock;
+
+/***********************************
+ * Global Variables for statistics
+ **********************************/
+int numTransCommit = 0;
+int numTransAbort = 0;
+int nchashSearch = 0;
+int nmhashSearch = 0;
+int nprehashSearch = 0;
+int nRemoteSend = 0;
+int nSoftAbort = 0;
+int bytesSent = 0;
+int bytesRecv = 0;
+int totalObjSize = 0;
+
+/***********************************
+ * Global variables for Duplication
+ ***********************************/
+int *liveHosts;
+int liveHostsValid;
+int numLiveHostsInSystem;
+int flipBit; // Used to distribute requests between primary and backup evenly
+unsigned int *locateObjHosts;
+__thread int timeoutFlag;
+extern int leaderFixing;
+extern pthread_mutex_t leaderFixing_mutex;
+extern pthread_mutex_t liveHosts_mutex;
+
+unsigned int liveTransactions[25];
+unsigned int transIDMax;
+unsigned int transIDMin;
+unsigned int transIDIndex;
+#ifdef DEBUG
+char ip[16];
+#endif
+
+/******************************
+ * Global variables for Paxos
+ ******************************/
+int n_a;
+unsigned int v_a;
+int n_h;
+int my_n;
+unsigned int leader;
+unsigned int origleader;
+unsigned int temp_v_a;
+int paxosRound;
+
+void printhex(unsigned char *, int);
+plistnode_t *createPiles();
+plistnode_t *sortPiles(plistnode_t *pileptr);
+
+/*******************************
+* Send and Recv function calls
+*******************************/
+void send_data(int fd, void *buf, int buflen) {
+#ifdef DEBUG
+// printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
+#endif
+ char *buffer = (char *)(buf);
+ int size = buflen;
+ int numbytes;
+ while (size > 0) {
+ numbytes = send(fd, buffer, size, 0);
+ bytesSent = bytesSent + numbytes;
+#ifdef RECOVERY
+#ifdef DEBUG
+// printf("%s-> numbytes: %d\n", __func__, numbytes);
+#endif
+ if(errno == ECONNRESET) { // EINT/EPIPE??; Connection reset, possible disconnected machine
+#ifdef DEBUG
+ printf("%s-> errno = ECONNRESET; connection reset\n", __func__);
+ printf("***SETTING TIMEOUTFLAG***\n");
+#endif
+ errno = 0;
+ timeoutFlag = 1;
+ return;
+ }
+ else if(errno == EAGAIN || errno == EWOULDBLOCK) {
+#ifdef DEBUG
+ printf("%s-> errno = EAGAIN|EWOULDBLOCK; socket timeout\n", __func__);
+ printf("***SETTING TIMEOUTFLAG***\n");
+#endif
+ errno = 0;
+ timeoutFlag = 1;
+ return;
+ }
+ else if(numbytes == -1) {
+#ifdef DEBUG
+ printf("%s-> numbytes = -1; socket timeout\n", __func__);
+ printf("***SETTING TIMEOUTFLAG***\n");
+#endif
+ timeoutFlag = 1;
+ return;
+ }
+#else
+ if (numbytes == -1) {
+ perror("send");
+ exit(0);
+ }
+#endif
+ buffer += numbytes;
+ size -= numbytes;
+ }
+#ifdef DEBUG
+// printf("%s-> Exiting\n", __func__);
+#endif
+}
+
+void recv_data(int fd, void *buf, int buflen) {
+#ifdef DEBUG
+// printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
+#endif
+ char *buffer = (char *)(buf);
+ int size = buflen;
+ int numbytes;
+ while (size > 0) {
+ numbytes = recv(fd, buffer, size, 0);
+ bytesRecv = bytesRecv + numbytes;
+#ifdef RECOVERY
+#ifdef DEBUG
+// printf("%s-> numbytes: %d\n", __func__, numbytes);
+#endif
+ if(errno == ECONNRESET) {
+#ifdef DEBUG
+ printf("%s-> errno = ECONNRESET; connection reset\n", __func__);
+ printf("***SETTING TIMEOUTFLAG***\n");
+#endif
+ errno = 0;
+ timeoutFlag = 1;
+ return;
+ }
+ else if(errno == EAGAIN || errno == EWOULDBLOCK) {
+#ifdef DEBUG
+ printf("%s-> errno = EAGAIN|EWOULDBLOCK; socket timeout\n", __func__);
+ printf("***SETTING TIMEOUTFLAG***\n");
+#endif
+ errno = 0;
+ timeoutFlag = 1;
+ return;
+ }
+ else if(numbytes == -1) {
+#ifdef DEBUG
+ printf("%s-> numbytes = -1; socket timeout\n", __func__);
+ printf("***SETTING TIMEOUTFLAG***\n");
+#endif
+ timeoutFlag = 1;
+ return;
+ }
+#else
+ if (numbytes == -1) {
+ perror("recv");
+ exit(0);
+ }
+#endif
+ buffer += numbytes;
+ size -= numbytes;
+ }
+#ifdef DEBUG
+// printf("%s-> Exiting\n", __func__);
+#endif
+}
+
+void recv_data_block(int fd, void *buf, int buflen) {
+#ifdef DEBUG
+ printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
+#endif
+ char *buffer = (char *)(buf);
+ int size = buflen;
+ int numbytes;
+ while (size > 0) {
+ numbytes = recv(fd, buffer, size, 0);
+#ifdef DEBUG
+ printf("%s-> numbytes: %d\n", __func__, numbytes);
+#endif
+ if(errno == EAGAIN || errno == EWOULDBLOCK) {
+ errno = 0;
+ }
+ if(numbytes != -1) {
+ bytesRecv = bytesRecv + numbytes;
+ buffer += numbytes;
+ size -= numbytes;
+ }
+ }
+#ifdef DEBUG
+ printf("%s-> Exiting\n", __func__);
+#endif
+}
+
+int recv_data_errorcode(int fd, void *buf, int buflen) {
+#ifdef DEBUG
+ printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
+#endif
+ char *buffer = (char *)(buf);
+ int size = buflen;
+ int numbytes;
+ while (size > 0) {
+ numbytes = recv(fd, buffer, size, 0);
+#ifdef DEBUG
+ printf("%s-> numbytes: %d\n", __func__, numbytes);
+#endif
+ if (numbytes==0)
+ return 0;
+ else if (numbytes == -1) {
+ perror("recv_data_errorcode");
+ return -1;
+ }
+ buffer += numbytes;
+ size -= numbytes;
+ }
+#ifdef DEBUG
+ printf("%s-> Exiting\n", __func__);
+#endif
+ return 1;
+}
+
+void printhex(unsigned char *ptr, int numBytes) {
+ int i;
+ for (i = 0; i < numBytes; i++) {
+ if (ptr[i] < 16)
+ printf("0%x ", ptr[i]);
+ else
+ printf("%x ", ptr[i]);
+ }
+ printf("\n");
+ return;
+}
+
+inline int arrayLength(int *array) {
+ int i;
+ for(i=0 ; array[i] != -1; i++)
+ ;
+ return i;
+}
+
+inline int findmax(int *array, int arraylength) {
+ int max, i;
+ max = array[0];
+ for(i = 0; i < arraylength; i++) {
+ if(array[i] > max) {
+ max = array[i];
+ }
+ }
+ return max;
+}
+
+char* midtoIPString(unsigned int mid){
+ midtoIP(mid, ip);
+ return ip;
+}
+/* This function is a prefetch call generated by the compiler that
+ * populates the shared primary prefetch queue*/
+void prefetch(int siteid, int ntuples, unsigned int *oids, unsigned short *endoffsets, short *arrayfields) {
+ /* Allocate for the queue node*/
+ int qnodesize = 2*sizeof(int) + ntuples * (sizeof(unsigned short) + sizeof(unsigned int)) + endoffsets[ntuples - 1] * sizeof(short);
+ int len;
+ char * node= getmemory(qnodesize);
+ int top=endoffsets[ntuples-1];
+
+ if (node==NULL)
+ return;
+ /* Set queue node values */
+
+ /* TODO: Remove this after testing */
+ evalPrefetch[siteid].callcount++;
+
+ *((int *)(node))=siteid;
+ *((int *)(node + sizeof(int))) = ntuples;
+ len = 2*sizeof(int);
+ memcpy(node+len, oids, ntuples*sizeof(unsigned int));
+ memcpy(node+len+ntuples*sizeof(unsigned int), endoffsets, ntuples*sizeof(unsigned short));
+ memcpy(node+len+ntuples*(sizeof(unsigned int)+sizeof(short)), arrayfields, top*sizeof(short));
+
+ /* Lock and insert into primary prefetch queue */
+ movehead(qnodesize);
+}
+
+/* This function starts up the transaction runtime. */
+int dstmStartup(const char * option) {
+ pthread_t thread_Listen, udp_thread_Listen;
+ pthread_attr_t attr;
+ int master=option!=NULL && strcmp(option, "master")==0;
+ int fd;
+ int udpfd;
+
+ if (processConfigFile() != 0)
+ return 0; //TODO: return error value, cause main program to exit
+#ifdef COMPILER
+ if (!master)
+ threadcount--;
+#endif
+
+#ifdef TRANSSTATS
+ printf("Trans stats is on\n");
+ fflush(stdout);
+#endif
+#ifdef ABORTREADERS
+ initreaderlist();
+#endif
+
+ //Initialize socket pool
+ transReadSockPool = createSockPool(transReadSockPool, DEFAULTSOCKPOOLSIZE);
+ transPrefetchSockPool = createSockPool(transPrefetchSockPool, DEFAULTSOCKPOOLSIZE);
+ transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
+
+ dstmInit();
+ transInit();
+
+ fd=startlistening();
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+#ifdef CACHE
+ udpfd = udpInit();
+ pthread_create(&udp_thread_Listen, &attr, udpListenBroadcast, (void*)udpfd);
+#endif
+ if (master) {
+ pthread_create(&thread_Listen, &attr, dstmListen, (void*)fd);
+#ifdef RECOVERY
+ updateLiveHosts();
+ setLocateObjHosts();
+ updateLiveHostsCommit();
+ leader = paxos();
+ if(!allHostsLive()) {
+ printf("Not all hosts live. Exiting.\n");
+ exit(-1);
+ }
+#endif
+ return 1;
+ } else {
+ dstmListen((void *)fd);
+ return 0;
+ }
+}
+
+//TODO Use this later
+void *pCacheAlloc(objstr_t *store, unsigned int size) {
+ void *tmp;
+ objstr_t *ptr;
+ ptr = store;
+ int success = 0;
+
+ while(ptr->next != NULL) {
+ /* check if store is empty */
+ if(((unsigned int)ptr->top - (unsigned int)ptr - sizeof(objstr_t) + size) <= ptr->size) {
+ tmp = ptr->top;
+ ptr->top += size;
+ success = 1;
+ return tmp;
+ } else {
+ ptr = ptr->next;
+ }
+ }
+
+ if(success == 0) {
+ return NULL;
+ }
+}
+
+/* This function initiates the prefetch thread A queue is shared
+ * between the main thread of execution and the prefetch thread to
+ * process the prefetch call Call from compiler populates the shared
+ * queue with prefetch requests while prefetch thread processes the
+ * prefetch requests */
+
+void transInit() {
+ //Create and initialize prefetch cache structure
+#ifdef CACHE
+ initializePCache();
+ if((evalPrefetch = initPrefetchStats()) == NULL) {
+ printf("%s() Error allocating memory at %s, %d\n", __func__, __FILE__, __LINE__);
+ exit(0);
+ }
+#endif
+
+ /* Initialize attributes for mutex */
+ pthread_mutexattr_init(&prefetchcache_mutex_attr);
+ pthread_mutexattr_settype(&prefetchcache_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
+
+ pthread_mutex_init(&prefetchcache_mutex, &prefetchcache_mutex_attr);
+ pthread_mutex_init(¬ifymutex, NULL);
+ pthread_mutex_init(&atomicObjLock, NULL);
+#ifdef CACHE
+ //Create prefetch cache lookup table
+ if(prehashCreate(PHASH_SIZE, PLOADFACTOR)) {
+ printf("ERROR\n");
+ return; //Failure
+ }
+
+ //Initialize primary shared queue
+ queueInit();
+ //Initialize machine pile w/prefetch oids and offsets shared queue
+ mcpileqInit();
+
+ //Create the primary prefetch thread
+ int retval;
+#ifdef RANGEPREFETCH
+ do {
+ retval=pthread_create(&tPrefetch, NULL, transPrefetchNew, NULL);
+ } while(retval!=0);
+#else
+ do {
+ retval=pthread_create(&tPrefetch, NULL, transPrefetch, NULL);
+ } while(retval!=0);
+#endif
+ pthread_detach(tPrefetch);
+#endif
+}
+
+/* This function stops the threads spawned */
+void transExit() {
+#ifdef CACHE
+ int t;
+ pthread_cancel(tPrefetch);
+ for(t = 0; t < NUM_THREADS; t++)
+ pthread_cancel(wthreads[t]);
+#endif
+
+ return;
+}
+
+/* This functions inserts randowm wait delays in the order of msec
+ * Mostly used when transaction commits retry*/
+void randomdelay() {
+ struct timespec req;
+ time_t t;
+
+ t = time(NULL);
+ req.tv_sec = 0;
+ req.tv_nsec = (long)(1000 + (t%10000)); //1-11 microsec
+ nanosleep(&req, NULL);
+ return;
+}
+
+/* This function initializes things required in the transaction start*/
+void transStart() {
+ t_cache = objstrCreate(1048576);
+ t_chashCreate(CHASH_SIZE, CLOADFACTOR);
+ revertlist=NULL;
+#ifdef ABORTREADERS
+ t_abort=0;
+#endif
+}
+
+// Search for an address for a given oid
+/*#define INLINE inline __attribute__((always_inline))
+
+INLINE void * chashSearchI(chashtable_t *table, unsigned int key) {
+ //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
+ chashlistnode_t *node = &table->table[(key & table->mask)>>1];
+
+ do {
+ if(node->key == key) {
+ return node->val;
+ }
+ node = node->next;
+ } while(node != NULL);
+
+ return NULL;
+ }*/
+
+
+
+
+/* This function finds the location of the objects involved in a transaction
+ * and returns the pointer to the object if found in a remote location */
+__attribute__((pure)) objheader_t *transRead(unsigned int oid) {
+ unsigned int machinenumber;
+ objheader_t *tmp, *objheader;
+ objheader_t *objcopy;
+ int size;
+ void *buf;
+ chashlistnode_t *node;
+
+ if(oid == 0) {
+ return NULL;
+ }
+
+ node= &c_table[(oid & c_mask)>>1];
+ do {
+ if(node->key == oid) {
+#ifdef TRANSSTATS
+ nchashSearch++;
+#endif
+#ifdef COMPILER
+ return &((objheader_t*)node->val)[1];
+#else
+ return node->val;
+#endif
+ }
+ node = node->next;
+ } while(node != NULL);
+
+
+ /*
+ if((objheader = chashSearchI(record->lookupTable, oid)) != NULL) {
+#ifdef TRANSSTATS
+ nchashSearch++;
+#endif
+#ifdef COMPILER
+ return &objheader[1];
+#else
+ return objheader;
+#endif
+ } else
+ */
+
+#ifdef ABORTREADERS
+ if (t_abort) {
+ //abort this transaction
+ //printf("ABORTING\n");
+ removetransactionhash();
+ objstrDelete(t_cache);
+ t_chashDelete();
+ _longjmp(aborttrans,1);
+ } else
+ addtransaction(oid);
+#endif
+
+ if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
+#ifdef TRANSSTATS
+ nmhashSearch++;
+#endif
+ /* Look up in machine lookup table and copy into cache*/
+ GETSIZE(size, objheader);
+ size += sizeof(objheader_t);
+ objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
+ memcpy(objcopy, objheader, size);
+ /* Insert into cache's lookup table */
+ STATUS(objcopy)=0;
+ t_chashInsert(OID(objheader), objcopy);
+#ifdef COMPILER
+ return &objcopy[1];
+#else
+ return objcopy;
+#endif
+ } else {
+#ifdef CACHE
+ if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
+#ifdef TRANSSTATS
+ nprehashSearch++;
+#endif
+ /* Look up in prefetch cache */
+ GETSIZE(size, tmp);
+ size+=sizeof(objheader_t);
+ objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
+ memcpy(objcopy, tmp, size);
+ /* Insert into cache's lookup table */
+ t_chashInsert(OID(tmp), objcopy);
+#ifdef COMPILER
+ return &objcopy[1];
+#else
+ return objcopy;
+#endif
+ }
+#endif
+ /* Get the object from the remote location */
+ if((machinenumber = lhashSearch(oid)) == 0) {
+ printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
+ return NULL;
+ }
+ objcopy = getRemoteObj(machinenumber, oid);
+
+ if(objcopy == NULL) {
+ printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
+ return NULL;
+ } else {
+#ifdef TRANSSTATS
+ nRemoteSend++;
+#endif
+#ifdef COMPILER
+ return &objcopy[1];
+#else
+ return objcopy;
+#endif
+ }
+ }
+}
+
+
+/* This function finds the location of the objects involved in a transaction
+ * and returns the pointer to the object if found in a remote location */
+__attribute__((pure)) objheader_t *transRead2(unsigned int oid) {
+ unsigned int machinenumber;
+ objheader_t *tmp, *objheader;
+ objheader_t *objcopy;
+ int size;
+
+#ifdef DEBUG
+ printf("%s-> Start, oid:%u\n", __func__, oid);
+#endif
+
+#ifdef ABORTREADERS
+ if (t_abort) {
+ //abort this transaction
+ //printf("ABORTING\n");
+ removetransactionhash();
+ objstrDelete(t_cache);
+ t_chashDelete();
+ _longjmp(aborttrans,1);
+ } else
+ addtransaction(oid);
+#endif
+
+ if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
+#ifdef DEBUG
+ printf("%s-> Grab from this machine\n", __func__);
+#endif
+#ifdef TRANSSTATS
+ nmhashSearch++;
+#endif
+ /* Look up in machine lookup table and copy into cache*/
+ GETSIZE(size, objheader);
+ size += sizeof(objheader_t);
+ objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
+ memcpy(objcopy, objheader, size);
+ /* Insert into cache's lookup table */
+ STATUS(objcopy)=0;
+ t_chashInsert(OID(objheader), objcopy);
+#ifdef COMPILER
+ return &objcopy[1];
+#else
+ return objcopy;
+#endif
+ } else {
+#ifdef CACHE
+ , TYPE(header)if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
+#ifdef TRANSSTATS
+ nprehashSearch++;
+#endif
+ /* Look up in prefetch cache */
+ GETSIZE(size, tmp);
+ size+=sizeof(objheader_t);
+ objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
+ memcpy(objcopy, tmp, size);
+ /* Insert into cache's lookup table */
+ t_chashInsert(OID(tmp), objcopy);
+#ifdef COMPILER
+ return &objcopy[1];
+#else
+ return objcopy;
+#endif
+ }
+#endif
+ /* Get the object from the remote location */
+#ifdef DEBUG
+ printf("%s-> Grab from remote machine\n", __func__);
+#endif
+#ifdef RECOVERY
+ //while(!liveHostsValid) {
+ //}
+ /*if(!liveHostsValid){
+ sleep(WAIT_TIME);
+ }*/
+ unsigned int mindex = findHost(lhashSearch(oid));
+ machinenumber = locateObjHosts[2*mindex+flipBit];
+ flipBit ^= 1;
+ printf("mindex:%d, oid:%d, machinenumber:%s\n", mindex, oid, midtoIPString(machinenumber));
+#else
+ if((machinenumber = lhashSearch(oid)) == 0) {
+ printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
+ return NULL;
+ }
+#endif
+ objcopy = getRemoteObj(machinenumber, oid);
+
+ if(objcopy == NULL) {
+ printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
+ return NULL;
+ } else {
+#ifdef TRANSSTATS
+ nRemoteSend++;
+#endif
+#ifdef COMPILER
+ return &objcopy[1];
+#else
+ return objcopy;
+#endif
+ }
+ }
+}
+
+/* This function creates objects in the transaction record */
+objheader_t *transCreateObj(unsigned int size) {
+ objheader_t *tmp = (objheader_t *) objstrAlloc(&t_cache, (sizeof(objheader_t) + size));
+ OID(tmp) = getNewOID();
+ tmp->version = 1;
+ tmp->rcount = 1;
+ tmp->isBackup = 0;
+ STATUS(tmp) = NEW;
+ t_chashInsert(OID(tmp), tmp);
+
+#ifdef COMPILER
+ return &tmp[1]; //want space after object header
+#else
+ return tmp;
+#endif
+}
+
+
+#if 1
+/* This function creates machine piles based on all machines involved in a
+ * transaction commit request */
+plistnode_t *createPiles() {
+ int i;
+ unsigned int oid;
+ plistnode_t *pile = NULL;
+ unsigned int machinenum;
+ unsigned int destMachine[2];
+ objheader_t *headeraddr;
+ chashlistnode_t * ptr = c_table;
+ /* Represents number of bins in the chash table */
+ unsigned int size = c_size;
+
+ for(i = 0; i < size ; i++) {
+ chashlistnode_t * curr = &ptr[i];
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ while(curr != NULL) {
+ //if the first bin in hash table is empty
+ if(curr->key == 0)
+ break;
+ headeraddr=(objheader_t *) curr->val;
+
+#if RECOVERY
+ oid = OID(headeraddr);
+#ifdef DEBUG
+ printf("%s-> oid:%u, version:%d, status:%d, type:%d\n", __func__, OID(headeraddr), headeraddr->version, STATUS(headeraddr), TYPE(headeraddr));
+
+ if (STATUS(headeraddr) & NEW) { // new/local object
+ printf("%s-> new/local object\n", __func__);
+ }
+ else if ((mhashSearch(curr->key) != NULL)) { //local/nonnew
+ if(STATUS(headeraddr) & DIRTY) { // modified
+ printf("%s-> old/local/mod object\n", __func__);
+ }
+ else { //read
+ printf("%s-> old/local/read object\n", __func__);
+ }
+ } else if ((machinenum = lhashSearch(curr->key)) != 0) { // remote/nonnew object
+ if(STATUS(headeraddr) & DIRTY) { //modified
+ printf("%s-> remote/local/mod object\n", __func__);
+ }
+ else { //read
+ printf("%s-> remote/local/read object\n", __func__);
+ }
+ } else {
+ printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+ unsigned int pmid = getPrimaryMachine(lhashSearch(oid));
+ unsigned int bmid = getBackupMachine(lhashSearch(oid));
+ printf("%s-> Primary Machine: [%s], ", __func__, midtoIPString(pmid));
+ printf("Backup Machine: [%s]\n", midtoIPString(bmid));
+#endif
+ int makedirty = 0;
+ if(STATUS(headeraddr) & DIRTY || STATUS(headeraddr) & NEW) {
+ makedirty = 1;
+ }
+ pile = pInsert(pile, headeraddr, getPrimaryMachine(lhashSearch(oid)), c_numelements);
+//problem here
+ if(makedirty) {
+ STATUS(headeraddr) = DIRTY;
+ }
+ pile = pInsert(pile, headeraddr, getBackupMachine(lhashSearch(oid)), c_numelements);
+#else
+ // Get machine location for object id (and whether local or not)
+ if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
+ machinenum = myIpAddr;
+ } else if ((machinenum = lhashSearch(curr->key)) == 0) {
+ printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+
+ //Make machine groups
+ pile = pInsert(pile, headeraddr, machinenum, c_numelements);
+#endif
+ curr = curr->next;
+ }
+ }
+ return pile;
+}
+#else
+/* This function creates machine piles based on all machines involved in a
+ * transaction commit request */
+plistnode_t *createPiles() {
+ int i;
+ plistnode_t *pile = NULL;
+ unsigned int machinenum;
+ unsigned int destMachine[2];
+ objheader_t *headeraddr;
+ struct chashentry * ptr = c_table;
+ /* Represents number of bins in the chash table */
+ unsigned int size = c_size;
+
+ for(i = 0; i < size ; i++) {
+ struct chashentry * curr = & ptr[i];
+ /* Inner loop to traverse the linked list of the cache lookupTable */
+ // if the first bin in hash table is empty
+ if(curr->key == 0)
+ continue;
+ headeraddr=(objheader_t *) curr->ptr;
+
+ //Get machine location for object id (and whether local or not)
+ if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
+ machinenum = myIpAddr;
+ } else if ((machinenum = lhashSearch(curr->key)) == 0) {
+ printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
+ return NULL;
+ }
+
+ //Make machine groups
+ pile = pInsert(pile, headeraddr, machinenum, c_numelements);
+ }
+ return pile;
+}
+#endif
+
+/* This function initiates the transaction commit process
+ * Spawns threads for each of the new connections with Participants
+ * and creates new piles by calling the createPiles(),
+ * Sends a transrequest() to each remote machines for objects found remotely
+ * and calls handleLocalReq() to process objects found locally */
+int transCommit() {
+ unsigned int tot_bytes_mod, *listmid;
+ plistnode_t *pile, *pile_ptr;
+ int trecvcount;
+ char treplyretry; /* keeps track of the common response that needs to be sent */
+ int firsttime=1;
+ trans_commit_data_t transinfo; /* keeps track of objs locked during transaction */
+ char finalResponse;
+
+ int tmpTransIndex = (transIDIndex++)%25;
+ liveTransactions[tmpTransIndex] = getNewTransID();
+
+#ifdef DEBUG
+ printf("%s-> Start, transID:%d\n", __func__, liveTransactions[tmpTransIndex]);
+#endif
+
+#ifdef ABORTREADERS
+ if (t_abort) {
+ //abort this transaction
+ /* Debug
+ * printf("ABORTING TRANSACTION AT COMMIT\n");
+ */
+ removetransactionhash();
+ objstrDelete(t_cache);
+ t_chashDelete();
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+#endif
+
+
+ do {
+ trecvcount = 0;
+ treplyretry = 0;
+
+ /* Look through all the objects in the transaction record and make piles
+ * for each machine involved in the transaction*/
+ if (firsttime) {
+ pile_ptr = pile = createPiles();
+ pile_ptr = pile = sortPiles(pile);
+ } else {
+ pile = pile_ptr;
+ }
+ firsttime = 0;
+ /* Create the packet to be sent in TRANS_REQUEST */
+
+ /* Count the number of participants */
+ int pilecount;
+ pilecount = pCount(pile);
+
+ /* Create a list of machine ids(Participants) involved in transaction */
+ listmid = calloc(pilecount, sizeof(unsigned int));
+ pListMid(pile, listmid);
+
+ /* Create a socket and getReplyCtrl array, initialize */
+ int socklist[pilecount];
+ int loopcount;
+ for(loopcount = 0 ; loopcount < pilecount; loopcount++)
+ socklist[loopcount] = 0;
+ char getReplyCtrl[pilecount];
+ for(loopcount = 0 ; loopcount < pilecount; loopcount++)
+ getReplyCtrl[loopcount] = 0;
+
+ /* Process each machine pile */
+ int sockindex = 0;
+ int localReqsock = -1;
+ trans_req_data_t *tosend;
+ tosend = calloc(pilecount, sizeof(trans_req_data_t));
+ while(pile != NULL) {
+#ifdef DEBUG
+ printf("%s-> New pile:[%s],", __func__, midtoIPString(pile->mid));
+ printf(" myIp:[%s]\n", midtoIPString(myIpAddr));
+#endif
+ tosend[sockindex].f.control = TRANS_REQUEST;
+ tosend[sockindex].f.mcount = pilecount;
+ tosend[sockindex].f.numread = pile->numread;
+ tosend[sockindex].f.nummod = pile->nummod;
+ tosend[sockindex].f.numcreated = pile->numcreated;
+#ifdef DEBUG
+ printf("%s-> numread:%d, nummod:%d, numcreated:%d\n", __func__, pile->numread, pile->nummod, pile->numcreated);
+#endif
+ tosend[sockindex].f.sum_bytes = pile->sum_bytes;
+ tosend[sockindex].listmid = listmid;
+ tosend[sockindex].objread = pile->objread;
+ tosend[sockindex].oidmod = pile->oidmod;
+ tosend[sockindex].oidcreated = pile->oidcreated;
+ int sd = 0;
+ if(pile->mid != myIpAddr) {
+#ifdef RECOVERY
+ if((sd = getSockWithLock(transRequestSockPool, pile->mid)) < 0) {
+#else
+ if((sd = getSock2WithLock(transRequestSockPool, pile->mid)) < 0) {
+#endif
+ printf("\ntransRequest(): socket create error\n");
+ free(listmid);
+ free(tosend);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+ socklist[sockindex] = sd;
+ /* Send bytes of data with TRANS_REQUEST control message */
+ send_data(sd, &(tosend[sockindex].f), sizeof(fixed_data_t));
+ /*if(timeoutFlag) {
+ printf("send_data: remote machine dead, line:%d\n", __LINE__);
+ timeoutFlag = 0;
+ exit(1);
+ }*/
+ /* Send list of machines involved in the transaction */
+ {
+ int size=sizeof(unsigned int)*(tosend[sockindex].f.mcount);
+ send_data(sd, tosend[sockindex].listmid, size);
+ }
+
+ /* Send oids and version number tuples for objects that are read */
+ {
+ int size=(sizeof(unsigned int)+sizeof(unsigned short))*(tosend[sockindex].f.numread);
+ send_data(sd, tosend[sockindex].objread, size);
+ }
+
+ /* Send objects that are modified */
+ void *modptr;
+ if((modptr = calloc(1, tosend[sockindex].f.sum_bytes)) == NULL) {
+ printf("Calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
+ free(listmid);
+ free(tosend);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+ int offset = 0;
+ int i;
+ for(i = 0; i < tosend[sockindex].f.nummod ; i++) {
+ int size;
+ objheader_t *headeraddr;
+ if((headeraddr = t_chashSearch(tosend[sockindex].oidmod[i])) == NULL) {
+ printf("%s() Error: No such oid %s, %d\n", __func__, __FILE__, __LINE__);
+ free(modptr);
+ free(listmid);
+ free(tosend);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+ GETSIZE(size,headeraddr);
+ size+=sizeof(objheader_t);
+ memcpy(modptr+offset, headeraddr, size);
+ offset+=size;
+ }
+ send_data(sd, modptr, tosend[sockindex].f.sum_bytes);
+ free(modptr);
+ } else { //handle request locally
+ localReqsock = sockindex;
+ handleLocalReq(&tosend[sockindex], &transinfo, &getReplyCtrl[sockindex]);
+ }
+ sockindex++;
+ pile = pile->next;
+ } //end of pile processing
+
+ /* Recv Ctrl msgs from all machines */
+#ifdef DEBUG
+ printf("%s-> Finished sending transaction read/mod objects\n",__func__);
+#endif
+ int i;
+ for(i = 0; i < pilecount; i++) {
+ printf("i:%d\n", i);
+ if(i == localReqsock)
+ continue;
+ int sd = socklist[i];
+ if(sd != 0) {
+ char control;
+ recv_data(sd, &control, sizeof(char));
+ /*if(timeoutFlag) {
+ printf("recv_data: remote machine dead, timeoutFlag:%d, timeoutFlag:%d, line:%d\n", timeoutFlag, timeoutFlag, __LINE__);
+ timeoutFlag = 0;
+ exit(1);
+ }*/
+ //Update common data structure with new ctrl msg
+ getReplyCtrl[i] = control;
+ /* Recv Objects if participant sends TRANS_DISAGREE */
+ //printf("getReplyCtrl[%d] = %d\n", i, (int)getReplyCtrl[i]);
+#ifdef CACHE
+ if(control == TRANS_DISAGREE) {
+ int length;
+ recv_data(sd, &length, sizeof(int));
+ void *newAddr;
+ pthread_mutex_lock(&prefetchcache_mutex);
+ if ((newAddr = prefetchobjstrAlloc((unsigned int)length)) == NULL) {
+ printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
+ free(tosend);
+ free(listmid);
+ pthread_mutex_unlock(&prefetchcache_mutex);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ recv_data(sd, newAddr, length);
+ int offset = 0;
+ while(length != 0) {
+ unsigned int oidToPrefetch;
+ objheader_t * header;
+ header = (objheader_t *)(((char *)newAddr) + offset);
+ oidToPrefetch = OID(header);
+ STATUS(header)=0;
+ int size = 0;
+ GETSIZE(size, header);
+ size += sizeof(objheader_t);
+ //make an entry in prefetch hash table
+ void *oldptr;
+ if((oldptr = prehashSearch(oidToPrefetch)) != NULL) {
+ prehashRemove(oidToPrefetch);
+ prehashInsert(oidToPrefetch, header);
+ } else {
+ prehashInsert(oidToPrefetch, header);
+ }
+ length = length - size;
+ offset += size;
+ }
+ } //end of receiving objs
+#endif
+ }
+ }
+#ifdef DEBUG
+ printf("%s-> Decide final response now\n", __func__);
+#endif
+ /* Decide the final response */
+ if((finalResponse = decideResponse(getReplyCtrl, &treplyretry, pilecount)) == 0) {
+ printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
+ free(tosend);
+ free(listmid);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+#ifdef DEBUG
+ printf("%s-> Final Response: %d\n", __func__, (int)finalResponse);
+#endif
+ /* Send responses to all machines */
+ for(i = 0; i < pilecount; i++) {
+ int sd = socklist[i];
+ if(sd != 0) {
+#ifdef CACHE
+ if(finalResponse == TRANS_COMMIT) {
+ int retval;
+ /* Update prefetch cache */
+ if((retval = updatePrefetchCache(&(tosend[i]))) != 0) {
+ printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
+ free(tosend);
+ free(listmid);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+
+
+ /* Invalidate objects in other machine cache */
+ if(tosend[i].f.nummod > 0) {
+ if((retval = invalidateObj(&(tosend[i]))) != 0) {
+ printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
+ free(tosend);
+ free(listmid);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 1;
+ }
+ }
+#ifdef ABORTREADERS
+ removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
+ removethisreadtransaction(tosend[i].objread, tosend[i].f.numread);
+#endif
+ }
+#ifdef ABORTREADERS
+ else if (!treplyretry) {
+ removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
+ removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
+ }
+#endif
+#endif
+ send_data(sd, &finalResponse, sizeof(char));
+ } else {
+ /* Complete local processing */
+ doLocalProcess(finalResponse, &(tosend[i]), &transinfo);
+#ifdef ABORTREADERS
+ if(finalResponse == TRANS_COMMIT) {
+ removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
+ removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
+ } else if (!treplyretry) {
+ removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
+ removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
+ }
+#endif
+ }
+ }
+
+#ifdef RECOVERY
+#ifdef DEBUG
+ printf("%s-> Free sockets\n", __func__);
+#endif
+ for(i = 0; i < pilecount; i++) {
+ if(socklist[i] != 0) {
+ freeSockWithLock(transRequestSockPool, listmid[i], socklist[i]);
+ }
+ }
+#endif
+ /* Free resources */
+ free(tosend);
+ free(listmid);
+ if (!treplyretry)
+ pDelete(pile_ptr);
+ /* wait a random amount of time before retrying to commit transaction*/
+ if(treplyretry) {
+ randomdelay();
+#ifdef TRANSSTATS
+ nSoftAbort++;
+#endif
+ }
+ /* Retry trans commit procedure during soft_abort case */
+ } while (treplyretry);
+
+ if(finalResponse == TRANS_ABORT) {
+ //printf("Aborting trans\n");
+#ifdef TRANSSTATS
+ numTransAbort++;
+#endif
+ /* Free Resources */
+ objstrDelete(t_cache);
+ t_chashDelete();
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return TRANS_ABORT;
+ } else if(finalResponse == TRANS_COMMIT) {
+#ifdef TRANSSTATS
+ numTransCommit++;
+#endif
+ /* Free Resources */
+ objstrDelete(t_cache);
+ t_chashDelete();
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 0;
+ } else {
+ //TODO Add other cases
+ printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ exit(-1);
+ }
+#ifdef DEBUG
+ printf("%s-> End, line:%d\n\n", __func__, __LINE__);
+#endif
+#ifdef RECOVERY
+ liveTransactions[tmpTransIndex] = 0;
+#endif
+ return 0;
+}
+
+/* This function handles the local objects involved in a transaction
+ * commiting process. It also makes a decision if this local machine
+ * sends AGREE or DISAGREE or SOFT_ABORT to coordinator */
+void handleLocalReq(trans_req_data_t *tdata, trans_commit_data_t *transinfo, char *getReplyCtrl) {
+ unsigned int *oidnotfound = NULL, *oidlocked = NULL;
+ int numoidnotfound = 0, numoidlocked = 0;
+ int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
+ int numread, i;
+ unsigned int oid;
+ unsigned short version;
+
+ /* Counters and arrays to formulate decision on control message to be sent */
+ oidnotfound = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod), sizeof(unsigned int));
+ oidlocked = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod +1), sizeof(unsigned int)); // calloc additional 1 byte for
+ //setting a divider between read and write locks
+ numread = tdata->f.numread;
+ /* Process each oid in the machine pile/ group per thread */
+ for (i = 0; i < tdata->f.numread + tdata->f.nummod; i++) {
+ if (i < tdata->f.numread) {
+ int incr = sizeof(unsigned int) + sizeof(unsigned short); // Offset that points to next position in the objread array
+ incr *= i;
+ oid = *((unsigned int *)(((char *)tdata->objread) + incr));
+ version = *((unsigned short *)(((char *)tdata->objread) + incr + sizeof(unsigned int)));
+ commitCountForObjRead(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
+ } else { // Objects Modified
+ if(i == tdata->f.numread) {
+ oidlocked[numoidlocked++] = -1;
+ }
+ int tmpsize;
+ objheader_t *headptr;
+ headptr = (objheader_t *) t_chashSearch(tdata->oidmod[i-numread]);
+ if (headptr == NULL) {
+ printf("Error: handleLocalReq() returning NULL, no such oid %s, %d\n", __FILE__, __LINE__);
+ return;
+ }
+ oid = OID(headptr);
+ version = headptr->version;
+ commitCountForObjMod(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
+ }
+ }
+
+ /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
+ * if Participant receives a TRANS_COMMIT */
+ transinfo->objlocked = oidlocked;
+ transinfo->objnotfound = oidnotfound;
+ transinfo->modptr = NULL;
+ transinfo->numlocked = numoidlocked;
+ transinfo->numnotfound = numoidnotfound;
+
+ /* Condition to send TRANS_AGREE */
+ if(v_matchnolock == tdata->f.numread + tdata->f.nummod) {
+ *getReplyCtrl = TRANS_AGREE;
+ }
+ /* Condition to send TRANS_SOFT_ABORT */
+ if((v_matchlock > 0 && v_nomatch == 0) || (numoidnotfound > 0 && v_nomatch == 0)) {
+ *getReplyCtrl = TRANS_SOFT_ABORT;
+ }
+}
+
+void doLocalProcess(char finalResponse, trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
+ if(finalResponse == TRANS_ABORT) {
+ if(transAbortProcess(transinfo) != 0) {
+ printf("Error in transAbortProcess() %s,%d\n", __FILE__, __LINE__);
+ fflush(stdout);
+ return;
+ }
+ } else if(finalResponse == TRANS_COMMIT) {
+#ifdef CACHE
+ /* Invalidate objects in other machine cache */
+ if(tdata->f.nummod > 0) {
+ int retval;
+ if((retval = invalidateObj(tdata)) != 0) {
+ printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
+ return;
+ }
+ }
+#endif
+ if(transComProcess(tdata, transinfo) != 0) {
+ printf("Error in transComProcess() %s,%d\n", __FILE__, __LINE__);
+ fflush(stdout);
+ return;
+ }
+ } else {
+ printf("ERROR...No Decision\n");
+ }
+
+ /* Free memory */
+ if (transinfo->objlocked != NULL) {
+ free(transinfo->objlocked);
+ }
+ if (transinfo->objnotfound != NULL) {
+ free(transinfo->objnotfound);
+ }
+}
+
+/* This function decides the reponse that needs to be sent to
+ * all Participant machines after the TRANS_REQUEST protocol */
+char decideResponse(char *getReplyCtrl, char *treplyretry, int pilecount) {
+ int i, transagree = 0, transdisagree = 0, transsoftabort = 0; /* Counters to formulate decision of what
+ message to send */
+ for (i = 0 ; i < pilecount; i++) {
+ char control;
+ control = getReplyCtrl[i];
+ switch(control) {
+ default:
+#ifdef DEBUG
+ printf("%s-> Participant sent unknown message, i:%d, Control: %d\n", __func__, i, (int)control);
+#endif
+
+ /* treat as disagree, pass thru */
+ case TRANS_DISAGREE:
+ transdisagree++;
+#ifdef DEBUG
+ printf("%s-> Participant sent TRANS_DISAGREE, i:%d, Control: %d\n", __func__, i, (int)control);
+#endif
+ break;
+
+ case TRANS_AGREE:
+ transagree++;
+#ifdef DEBUG
+ printf("%s-> Participant sent TRANS_AGREE, i:%d, Control: %d\n", __func__, i, (int)control);
+#endif
+ break;
+
+ case TRANS_SOFT_ABORT:
+ transsoftabort++;
+#ifdef DEBUG
+ printf("%s-> Participant sent TRANS_SOFT_ABORT, i:%d, Control: %d\n", __func__, i, (int)control);
+#endif
+ break;
+ }
+ }
+
+ if(transdisagree > 0) {
+ /* Send Abort */
+ *treplyretry = 0;
+ return TRANS_ABORT;
+#ifdef CACHE
+ /* clear objects from prefetch cache */
+ cleanPCache();
+#endif
+ } else if(transagree == pilecount) {
+ /* Send Commit */
+ *treplyretry = 0;
+ return TRANS_COMMIT;
+ } else {
+ /* Send Abort in soft abort case followed by retry commiting transaction again*/
+ *treplyretry = 1;
+ return TRANS_ABORT;
+ }
+ return 0;
+}
+
+/* This function opens a connection, places an object read request to
+ * the remote machine, reads the control message and object if
+ * available and copies the object and its header to the local
+ * cache. */
+
+void *getRemoteObj(unsigned int mnum, unsigned int oid) {
+ int size, val;
+ struct sockaddr_in serv_addr;
+ char machineip[16];
+ char control = 0;
+ objheader_t *h;
+ void *objcopy = NULL;
+
+ int sd = getSock2(transReadSockPool, mnum);
+ char readrequest[sizeof(char)+sizeof(unsigned int)];
+ readrequest[0] = READ_REQUEST;
+ *((unsigned int *)(&readrequest[1])) = oid;
+ send_data(sd, readrequest, sizeof(readrequest));
+
+ /* Read response from the Participant */
+ recv_data(sd, &control, sizeof(char));
+
+ if (control==OBJECT_NOT_FOUND) {
+ objcopy = NULL;
+ } else if(control==OBJECT_FOUND) {
+ /* Read object if found into local cache */
+ recv_data(sd, &size, sizeof(int));
+ objcopy = objstrAlloc(&t_cache, size);
+ recv_data(sd, objcopy, size);
+ STATUS(objcopy)=0;
+ /* Insert into cache's lookup table */
+ t_chashInsert(oid, objcopy);
+#ifdef TRANSSTATS
+ totalObjSize += size;
+#endif
+ }
+
+#ifdef RECOVERY
+ if( detectMachineFailure(mnum) ) { //check for timeouts
+ printf("looking for oid:%d\n", oid);
+ restoreDuplicationState(mnum); // suspect machine failure, restore state
+
+ objheader_t *temp;
+ temp = transRead2(oid); // retry transRead
+#ifdef COMPILER
+ temp -= 1; // return object w/ objheader
+#endif
+ return (void *)temp;
+ }
+#endif
+ return objcopy;
+}
+
+int detectMachineFailure(unsigned int mid) {
+ if(timeoutFlag == 1) {
+#ifdef DEBUG
+ printf("%s-> Suspect machine failure: [%s]\n", __func__, midtoIPString(mid));
+#endif
+ timeoutFlag = 0;
+ return 1;
+ }
+ else
+ return 0;
+}
+
+void restoreDuplicationState(unsigned int deadHost) {
+ int sd;
+ char ctrl;
+
+ if(!liveHosts[findHost(deadHost)]) {
+ sleep(WAIT_TIME);
+ return;
+ }
+ if(deadHost == leader)
+ paxos();
+
+#ifdef DEBUG
+ printf("%s-> leader?:%s, me?:%d\n", __func__, midtoIPString(leader), (myIpAddr == leader));
+#endif
+
+ if(leader == myIpAddr) {
+ pthread_mutex_lock(&leaderFixing_mutex);
+ if(!leaderFixing) {
+ leaderFixing = 1;
+ pthread_mutex_unlock(&leaderFixing_mutex);
+ //fixit
+ updateLiveHosts();
+
+ if(!liveHosts[findHost(deadHost)]) { //confirmed dead
+ duplicateLostObjects(deadHost);
+ }
+ if(updateLiveHostsCommit() != 0) {
+ printf("error updateLiveHostsCommit()\n");
+ exit(1);
+ }
+ pthread_mutex_lock(&leaderFixing_mutex);
+ leaderFixing = 0;
+ pthread_mutex_unlock(&leaderFixing_mutex);
+ }
+ else {
+ pthread_mutex_unlock(&leaderFixing_mutex);
+ sleep(WAIT_TIME);
+ //while(leaderFixing);
+ return;
+ }
+ }
+ else {
+ if((sd = getSock2WithLock(transRequestSockPool, leader)) < 0) {
+ printf("restoreDuplicationState(): socket create error\n");
+ exit(-1);
+ }
+ ctrl = REMOTE_RESTORE_DUPLICATED_STATE;
+ send_data(sd, &ctrl, sizeof(char));
+ send_data(sd, &deadHost, sizeof(unsigned int));
+ recv_data(sd, &ctrl, sizeof(char));
+ sleep(WAIT_TIME);
+ return;
+ }
+}
+
+/* Commit info for objects modified */
+void commitCountForObjMod(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
+ int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
+ void *mobj;
+ /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
+ /* Save the oids not found and number of oids not found for later use */
+ if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
+ /* Save the oids not found and number of oids not found for later use */
+ oidnotfound[*numoidnotfound] = oid;
+ (*numoidnotfound)++;
+ } else { /* If Obj found in machine (i.e. has not moved) */
+ /* Check if Obj is locked by any previous transaction */
+ if (write_trylock(STATUSPTR(mobj))) { // Can acquire write lock
+ if (version == ((objheader_t *)mobj)->version) { /* match versions */
+ (*v_matchnolock)++;
+ //Keep track of what is locked
+ oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ /* Send TRANS_DISAGREE to Coordinator */
+ *getReplyCtrl = TRANS_DISAGREE;
+
+ //Keep track of what is locked
+ oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ return;
+ }
+ } else { //A lock is acquired some place else
+ if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
+ (*v_matchlock)++;
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ /* Send TRANS_DISAGREE to Coordinator */
+ *getReplyCtrl = TRANS_DISAGREE;
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ return;
+ }
+ }
+ }
+}
+
+/* Commit info for objects modified */
+void commitCountForObjRead(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
+ int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
+ void *mobj;
+ /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
+ /* Save the oids not found and number of oids not found for later use */
+ if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
+ /* Save the oids not found and number of oids not found for later use */
+ oidnotfound[*numoidnotfound] = oid;
+ (*numoidnotfound)++;
+ } else { /* If Obj found in machine (i.e. has not moved) */
+ /* Check if Obj is locked by any previous transaction */
+ if (read_trylock(STATUSPTR(mobj))) { // Can further acquire read locks
+ if (version == ((objheader_t *)mobj)->version) { /* If locked then match versions */
+ (*v_matchnolock)++;
+ //Keep track of what is locked
+ oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ /* Send TRANS_DISAGREE to Coordinator */
+ *getReplyCtrl = TRANS_DISAGREE;
+ //Keep track of what is locked
+ oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ return;
+ }
+ } else { //Has reached max number of readers or some other transaction
+ //has acquired a lock on this object
+ if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
+ (*v_matchlock)++;
+ } else { /* If versions don't match ...HARD ABORT */
+ (*v_nomatch)++;
+ /* Send TRANS_DISAGREE to Coordinator */
+ *getReplyCtrl = TRANS_DISAGREE;
+ //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
+ return;
+ }
+ }
+ }
+}
+
+/* This function completes the ABORT process if the transaction is aborting */
+int transAbortProcess(trans_commit_data_t *transinfo) {
+ int i, numlocked;
+ unsigned int *objlocked;
+ void *header;
+
+ numlocked = transinfo->numlocked;
+ objlocked = transinfo->objlocked;
+
+ int useWriteUnlock = 0;
+ for (i = 0; i < numlocked; i++) {
+ if(objlocked[i] == -1) {
+ useWriteUnlock = 1;
+ continue;
+ }
+ if((header = mhashSearch(objlocked[i])) == NULL) {
+ printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ if(!useWriteUnlock) {
+ read_unlock(STATUSPTR(header));
+ } else {
+ write_unlock(STATUSPTR(header));
+ }
+ }
+
+ return 0;
+}
+
+/*This function completes the COMMIT process if the transaction is commiting*/
+int transComProcess(trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
+ objheader_t *header, *tcptr;
+ int i, nummod, tmpsize, numcreated, numlocked;
+ unsigned int *oidmod, *oidcreated, *oidlocked;
+ void *ptrcreate;
+#ifdef DEBUG
+ printf("%s-> Entering transComProcess, trans.c\n", __func__);
+#endif
+
+ nummod = tdata->f.nummod;
+ oidmod = tdata->oidmod;
+ numcreated = tdata->f.numcreated;
+ oidcreated = tdata->oidcreated;
+ numlocked = transinfo->numlocked;
+ oidlocked = transinfo->objlocked;
+
+
+#ifdef DEBUG
+ printf("%s-> nummod: %d, numcreated: %d, numlocked: %d\n", __func__, nummod, numcreated, numlocked);
+#endif
+
+ for (i = 0; i < nummod; i++) {
+ if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
+ printf("Error: transComProcess() mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ /* Copy from transaction cache -> main object store */
+ if ((tcptr = ((objheader_t *) t_chashSearch(oidmod[i]))) == NULL) {
+ printf("Error: transComProcess() chashSearch returned NULL at %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ GETSIZE(tmpsize, header);
+ char *tmptcptr = (char *) tcptr;
+ {
+ struct ___Object___ *dst=(struct ___Object___*)((char*)header+sizeof(objheader_t));
+ struct ___Object___ *src=(struct ___Object___*)((char*)tmptcptr+sizeof(objheader_t));
+ dst->___cachedCode___=src->___cachedCode___;
+ dst->___cachedHash___=src->___cachedHash___;
+
+ memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
+ }
+
+ header->version += 1;
+ //printf("oid: %u, new header version: %d\n", oidmod[i], header->version);
+ if(header->notifylist != NULL) {
+ notifyAll(&header->notifylist, OID(header), header->version);
+ }
+ }
+ /* If object is newly created inside transaction then commit it */
+ for (i = 0; i < numcreated; i++) {
+ if ((header = ((objheader_t *) t_chashSearch(oidcreated[i]))) == NULL) {
+ printf("Error: transComProcess() chashSearch returned NULL for oid = %x at %s, %d\n", oidcreated[i], __FILE__, __LINE__);
+ return 1;
+ }
+ header->version += 1;
+ //printf("oid: %u, new header version: %d\n", oidcreated[i], header->version);
+ GETSIZE(tmpsize, header);
+ tmpsize += sizeof(objheader_t);
+ pthread_mutex_lock(&mainobjstore_mutex);
+ if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
+ printf("Error: transComProcess() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
+ pthread_mutex_unlock(&mainobjstore_mutex);
+ return 1;
+ }
+ pthread_mutex_unlock(&mainobjstore_mutex);
+ /* Initialize read and write locks */
+ initdsmlocks(STATUSPTR(header));
+ memcpy(ptrcreate, header, tmpsize);
+ mhashInsert(oidcreated[i], ptrcreate);
+ lhashInsert(oidcreated[i], myIpAddr);
+ }
+ /* Unlock locked objects */
+ int useWriteUnlock = 0;
+ for(i = 0; i < numlocked; i++) {
+ if(oidlocked[i] == -1) {
+ useWriteUnlock = 1;
+ continue;
+ }
+ if((header = (objheader_t *) mhashSearch(oidlocked[i])) == NULL) {
+ printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
+ return 1;
+ }
+ if(!useWriteUnlock) {
+ read_unlock(STATUSPTR(header));
+ } else {
+ write_unlock(STATUSPTR(header));
+ }
+ }
+ return 0;
+}
+
+prefetchpile_t *foundLocal(char *ptr) {
+ int siteid = *(GET_SITEID(ptr));
+ int ntuples = *(GET_NTUPLES(ptr));
+ unsigned int * oidarray = GET_PTR_OID(ptr);
+ unsigned short * endoffsets = GET_PTR_EOFF(ptr, ntuples);
+ short * arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
+ prefetchpile_t * head=NULL;
+ int numLocal = 0;
+
+ int i;
+ for(i=0; i<ntuples; i++) {
+ unsigned short baseindex=(i==0) ? 0 : endoffsets[i-1];
+ unsigned short endindex=endoffsets[i];
+ unsigned int oid=oidarray[i];
+ int newbase;
+ int machinenum;
+ if (oid==0)
+ continue;
+ //Look up fields locally
+ for(newbase=baseindex; newbase<endindex; newbase++) {
+ if (!lookupObject(&oid, arryfields[newbase]))
+ break;
+ //Ended in a null pointer...
+ if (oid==0)
+ goto tuple;
+ }
+ //Entire prefetch is local
+ if (newbase==endindex&&checkoid(oid)) {
+ numLocal++;
+ goto tuple;
+ }
+ //Add to remote requests
+ machinenum=lhashSearch(oid);
+ insertPile(machinenum, oid, endindex-newbase, &arryfields[newbase], &head);
+tuple:
+ ;
+ }
+
+ /* handle dynamic prefetching */
+ handleDynPrefetching(numLocal, ntuples, siteid);
+ return head;
+}
+
+int checkoid(unsigned int oid) {
+ objheader_t *header;
+ if ((header=mhashSearch(oid))!=NULL) {
+ //Found on machine
+ return 1;
+ } else if ((header=prehashSearch(oid))!=NULL) {
+ //Found in cache
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int lookupObject(unsigned int * oid, short offset) {
+ objheader_t *header;
+ if ((header=mhashSearch(*oid))!=NULL) {
+ //Found on machine
+ ;
+ } else if ((header=prehashSearch(*oid))!=NULL) {
+ //Found in cache
+ ;
+ } else {
+ return 0;
+ }
+
+ if(TYPE(header) >= NUMCLASSES) {
+ int elementsize = classsize[TYPE(header)];
+ struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
+ int length = ao->___length___;
+ /* Check if array out of bounds */
+ if(offset < 0 || offset >= length) {
+ //if yes treat the object as found
+ (*oid)=0;
+ return 1;
+ }
+ (*oid) = *((unsigned int *)(((char *)ao) + sizeof(struct ArrayObject) + (elementsize*offset)));
+ return 1;
+ } else {
+ (*oid) = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + offset));
+ return 1;
+ }
+}
+
+
+/* This function is called by the thread calling transPrefetch */
+void *transPrefetch(void *t) {
+ while(1) {
+ /* read from prefetch queue */
+ void *node=gettail();
+ /* Check if the tuples are found locally, if yes then reduce them further*/
+ /* and group requests by remote machine ids by calling the makePreGroups() */
+ prefetchpile_t *pilehead = foundLocal(node);
+
+ if (pilehead!=NULL) {
+ // Get sock from shared pool
+
+ /* Send Prefetch Request */
+ prefetchpile_t *ptr = pilehead;
+ while(ptr != NULL) {
+ int sd = getSock2(transPrefetchSockPool, ptr->mid);
+ sendPrefetchReq(ptr, sd);
+ ptr = ptr->next;
+ }
+
+ /* Release socket */
+ // freeSock(transPrefetchSockPool, pilehead->mid, sd);
+
+ /* Deallocated pilehead */
+ mcdealloc(pilehead);
+ }
+ // Deallocate the prefetch queue pile node
+ inctail();
+ }
+}
+
+void sendPrefetchReqnew(prefetchpile_t *mcpilenode, int sd) {
+ objpile_t *tmp;
+
+ int size=sizeof(char)+sizeof(int);
+ for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
+ size += sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
+ }
+
+ char buft[size];
+ char *buf=buft;
+ *buf=TRANS_PREFETCH;
+ buf+=sizeof(char);
+
+ for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
+ int len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
+ *((int*)buf)=len;
+ buf+=sizeof(int);
+ *((unsigned int *)buf)=tmp->oid;
+ buf+=sizeof(unsigned int);
+ *((unsigned int *)(buf)) = myIpAddr;
+ buf+=sizeof(unsigned int);
+ memcpy(buf, tmp->offset, tmp->numoffset*sizeof(short));
+ buf+=tmp->numoffset*sizeof(short);
+ }
+ *((int *)buf)=-1;
+ send_data(sd, buft, size);
+ return;
+}
+
+void sendPrefetchReq(prefetchpile_t *mcpilenode, int sd) {
+ int len, endpair;
+ char control;
+ objpile_t *tmp;
+
+ /* Send TRANS_PREFETCH control message */
+ control = TRANS_PREFETCH;
+ send_data(sd, &control, sizeof(char));
+
+ /* Send Oids and offsets in pairs */
+ tmp = mcpilenode->objpiles;
+ while(tmp != NULL) {
+ len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
+ char oidnoffset[len];
+ char *buf=oidnoffset;
+ *((int*)buf) = tmp->numoffset;
+ buf+=sizeof(int);
+ *((unsigned int *)buf) = tmp->oid;
+ buf+=sizeof(unsigned int);
+ *((unsigned int *)buf) = myIpAddr;
+ buf += sizeof(unsigned int);
+ memcpy(buf, tmp->offset, (tmp->numoffset)*sizeof(short));
+ send_data(sd, oidnoffset, len);
+ tmp = tmp->next;
+ }
+
+ /* Send a special char -1 to represent the end of sending oids + offset pair to remote machine */
+ endpair = -1;
+ send_data(sd, &endpair, sizeof(int));
+
+ return;
+}
+
+int getPrefetchResponse(int sd) {
+ int length = 0, size = 0;
+ char control;
+ unsigned int oid;
+ void *modptr, *oldptr;
+
+ recv_data((int)sd, &length, sizeof(int));
+ size = length - sizeof(int);
+ char recvbuffer[size];
+
+ recv_data((int)sd, recvbuffer, size);
+ control = *((char *) recvbuffer);
+ if(control == OBJECT_FOUND) {
+ oid = *((unsigned int *)(recvbuffer + sizeof(char)));
+ size = size - (sizeof(char) + sizeof(unsigned int));
+ pthread_mutex_lock(&prefetchcache_mutex);
+ if ((modptr = prefetchobjstrAlloc(size)) == NULL) {
+ printf("Error: objstrAlloc error for copying into prefetch cache %s, %d\n", __FILE__, __LINE__);
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ return -1;
+ }
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ memcpy(modptr, recvbuffer + sizeof(char) + sizeof(unsigned int), size);
+ STATUS(modptr)=0;
+
+ /* Insert the oid and its address into the prefetch hash lookup table */
+ /* Do a version comparison if the oid exists */
+ if((oldptr = prehashSearch(oid)) != NULL) {
+ /* If older version then update with new object ptr */
+ if(((objheader_t *)oldptr)->version <= ((objheader_t *)modptr)->version) {
+ prehashRemove(oid);
+ prehashInsert(oid, modptr);
+ }
+ } else { /* Else add the object ptr to hash table*/
+ prehashInsert(oid, modptr);
+ }
+ /* Lock the Prefetch Cache look up table*/
+ pthread_mutex_lock(&pflookup.lock);
+ /* Broadcast signal on prefetch cache condition variable */
+ pthread_cond_broadcast(&pflookup.cond);
+ /* Unlock the Prefetch Cache look up table*/
+ pthread_mutex_unlock(&pflookup.lock);
+ } else if(control == OBJECT_NOT_FOUND) {
+ oid = *((unsigned int *)(recvbuffer + sizeof(char)));
+ /* TODO: For each object not found query DHT for new location and retrieve the object */
+ /* Throw an error */
+ //printf("OBJECT %x NOT FOUND.... THIS SHOULD NOT HAPPEN...TERMINATE PROGRAM\n", oid);
+ // exit(-1);
+ } else {
+ printf("Error: in decoding the control value %d, %s, %d\n",control, __FILE__, __LINE__);
+ }
+
+ return 0;
+}
+
+unsigned short getObjType(unsigned int oid) {
+ objheader_t *objheader;
+ unsigned short numoffset[] ={0};
+ short fieldoffset[] ={};
+
+ if ((objheader = (objheader_t *) mhashSearch(oid)) == NULL) {
+#ifdef CACHE
+ if ((objheader = (objheader_t *) prehashSearch(oid)) == NULL) {
+#endif
+ unsigned int mid = lhashSearch(oid);
+ int sd = getSock2(transReadSockPool, mid);
+ char remotereadrequest[sizeof(char)+sizeof(unsigned int)];
+ remotereadrequest[0] = READ_REQUEST;
+ *((unsigned int *)(&remotereadrequest[1])) = oid;
+ send_data(sd, remotereadrequest, sizeof(remotereadrequest));
+
+ /* Read response from the Participant */
+ char control;
+ recv_data(sd, &control, sizeof(char));
+
+ if (control==OBJECT_NOT_FOUND) {
+ printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
+ fflush(stdout);
+ exit(-1);
+ } else {
+ /* Read object if found into local cache */
+ int size;
+ recv_data(sd, &size, sizeof(int));
+#ifdef CACHE
+ pthread_mutex_lock(&prefetchcache_mutex);
+ if ((objheader = prefetchobjstrAlloc(size)) == NULL) {
+ printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
+ pthread_exit(NULL);
+ }
+ pthread_mutex_unlock(&prefetchcache_mutex);
+ recv_data(sd, objheader, size);
+ prehashInsert(oid, objheader);
+ return TYPE(objheader);
+#else
+ char *buffer;
+ if((buffer = calloc(1, size)) == NULL) {
+ printf("%s() Calloc Error %s at line %d\n", __func__, __FILE__, __LINE__);
+ fflush(stdout);
+ return 0;
+ }
+ recv_data(sd, buffer, size);
+ objheader = (objheader_t *)buffer;
+ unsigned short type = TYPE(objheader);
+ free(buffer);
+ return type;
+#endif
+ }
+#ifdef CACHE
+ }
+#endif
+ }
+ return TYPE(objheader);
+}
+
+int startRemoteThread(unsigned int oid, unsigned int mid) {
+ int sock;
+ struct sockaddr_in remoteAddr;
+ char msg[1 + sizeof(unsigned int)];
+ int bytesSent;
+ int status;
+
+ if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("startRemoteThread():socket()");
+ return -1;
+ }
+
+ bzero(&remoteAddr, sizeof(remoteAddr));
+ remoteAddr.sin_family = AF_INET;
+ remoteAddr.sin_port = htons(LISTEN_PORT);
+ remoteAddr.sin_addr.s_addr = htonl(mid);
+
+ if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
+ printf("startRemoteThread():error %d connecting to %s:%d\n", errno,
+ inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
+ status = -1;
+ } else
+ {
+ msg[0] = START_REMOTE_THREAD;
+ *((unsigned int *) &msg[1]) = oid;
+ send_data(sock, msg, 1 + sizeof(unsigned int));
+ }
+
+ close(sock);
+ return status;
+}
+
+//TODO: when reusing oids, make sure they are not already in use!
+static unsigned int id = 0xFFFFFFFF;
+unsigned int getNewOID(void) {
+ id += 2;
+ if (id > oidMax || id < oidMin) {
+ id = (oidMin | 1);
+ }
+ return id;
+}
+
+static unsigned int tid = 0xFFFFFFFF;
+unsigned int getNewTransID(void) {
+ tid++;
+ if (tid > transIDMax || tid < transIDMin) {
+ tid = (transIDMin | 1);
+ }
+ return tid;
+}
+
+int processConfigFile() {
+ FILE *configFile;
+ const int maxLineLength = 200;
+ char lineBuffer[maxLineLength];
+ char *token;
+ const char *delimiters = " \t\n";
+ char *commentBegin;
+ in_addr_t tmpAddr;
+
+ configFile = fopen(CONFIG_FILENAME, "r");
+ if (configFile == NULL) {
+ printf("error opening %s:\n", CONFIG_FILENAME);
+ perror("");
+ return -1;
+ }
+
+ numHostsInSystem = 0;
+ sizeOfHostArray = 8;
+ hostIpAddrs = calloc(sizeOfHostArray, sizeof(unsigned int));
+#ifdef RECOVERY
+ liveHosts = calloc(sizeOfHostArray, sizeof(unsigned int));
+ locateObjHosts = calloc(sizeOfHostArray*2, sizeof(unsigned int));
+ liveHostsValid = 0;
+#endif
+
+ while(fgets(lineBuffer, maxLineLength, configFile) != NULL) {
+ commentBegin = strchr(lineBuffer, '#');
+ if (commentBegin != NULL)
+ *commentBegin = '\0';
+ token = strtok(lineBuffer, delimiters);
+ while (token != NULL) {
+ tmpAddr = inet_addr(token);
+ if ((int)tmpAddr == -1) {
+ printf("error in %s: bad token:%s\n", CONFIG_FILENAME, token);
+ fclose(configFile);
+ return -1;
+ } else
+ addHost(htonl(tmpAddr));
+ token = strtok(NULL, delimiters);
+ }
+ }
+
+ fclose(configFile);
+
+ if (numHostsInSystem < 1) {
+ printf("error in %s: no IP Adresses found\n", CONFIG_FILENAME);
+ return -1;
+ }
+#ifdef MAC
+ myIpAddr = getMyIpAddr("en1");
+#else
+ myIpAddr = getMyIpAddr("eth0");
+#endif
+ myIndexInHostArray = findHost(myIpAddr);
+#ifdef RECOVERY
+ liveHosts[myIndexInHostArray] = 1;
+ //locateObjHosts[myIndexInHostArray] = myIpAddr;
+#endif
+ if (myIndexInHostArray == -1) {
+ printf("error in %s: IP Address of eth0 not found\n", CONFIG_FILENAME);
+ return -1;
+ }
+ oidsPerBlock = (0xFFFFFFFF / numHostsInSystem) + 1;
+ oidMin = oidsPerBlock * myIndexInHostArray;
+ if (myIndexInHostArray == numHostsInSystem - 1)
+ oidMax = 0xFFFFFFFF;
+ else
+ oidMax = oidsPerBlock * (myIndexInHostArray + 1) - 1;
+
+ transIDMin = oidMin;
+ transIDMax = oidMax;
+
+ return 0;
+}
+
+unsigned int getDuplicatedPrimaryMachine(unsigned int mid) {
+ int i;
+ for(i = 0; i < numHostsInSystem; i++) {
+ if(mid == locateObjHosts[(i*2)+1]) {
+ return locateObjHosts[i*2];
+ }
+ }
+ return -1;
+}
+
+unsigned int getPrimaryMachine(unsigned int mid) {
+ unsigned int pmid;
+ int pmidindex = 2*findHost(mid);
+
+ pthread_mutex_lock(&liveHosts_mutex);
+ pmid = locateObjHosts[pmidindex];
+ pthread_mutex_unlock(&liveHosts_mutex);
+ return pmid;
+}
+
+unsigned int getBackupMachine(unsigned int mid) {
+ unsigned int bmid;
+ int bmidindex = 2*findHost(mid)+1;
+
+ pthread_mutex_lock(&liveHosts_mutex);
+ bmid = locateObjHosts[bmidindex];
+ pthread_mutex_unlock(&liveHosts_mutex);
+ return bmid;
+}
+
+// updates the leader's liveHostArray and locateObj
+void updateLiveHosts() {
+#ifdef DEBUG
+ printf("%s-> Entering updateLiveHosts\n", __func__);
+#endif
+ // update everyone's list
+ liveHostsValid = 0;
+ //int *tmpLiveHosts = calloc(sizeOfHostArray, sizeof(unsigned int));
+ //foreach in hostipaddrs, ping -> update list of livemachines
+ //socket connection?
+
+ //liveHosts lock here
+ int sd = 0, i, j, tmpNumLiveHosts = 0;
+ for(i = 0; i < numHostsInSystem; i++) {
+ if(i == myIndexInHostArray)
+ {
+ tmpNumLiveHosts++;
+ continue;
+ }
+ for(j = 0; j < 5; j++) { // hard define num of retries
+ if((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
+ printf("updateLiveHosts(): Cannot create socket connection to [%s], attempt %d\n", __func__, midtoIPString(hostIpAddrs[i]), j);
+ usleep(1000);
+ if(j == 4)
+ liveHosts[i] = 0;
+ continue;
+ }
+ char liverequest[sizeof(char)];
+ liverequest[0] = RESPOND_LIVE;
+
+ send_data(sd, &liverequest[0], sizeof(liverequest));
+ char response = 0;
+ recv_data(sd, &response, sizeof(response));
+
+ //try to send msg
+ //if timeout, dead host
+ printf("YES received %d\n", response);
+ if(response == LIVE) {
+ printf("must enter here\n");
+ liveHosts[i] = 1;
+ tmpNumLiveHosts++;
+ //locateObjHosts[i*2] = hostIpAddrs[i];
+ }
+ else {
+ printf("or here\n");
+ liveHosts[i] = 0;
+ timeoutFlag = 0;
+ }
+ break;
+
+ }
+ if(liveHosts[i] == 0)
+ printf("updateLiveHosts(): cannot make connection to machine %s\n", midtoIPString(hostIpAddrs[i]));
+ }
+ numLiveHostsInSystem = tmpNumLiveHosts;
+ printf("numLiveHostsInSystem:%d\n", numLiveHostsInSystem);
+ //have updated list of live machines
+#ifdef DEBUG
+ printf("%s-> Exiting updateLiveHosts\n", __func__);
+ printHostsStatus();
+#endif
+}
+
+int getNumLiveHostsInSystem() {
+ int count = 0, i = 0;
+ for(; i<numHostsInSystem; i++) {
+ if(liveHosts[i])
+ count++;
+ }
+ return count;
+}
+
+int updateLiveHostsCommit() {
+ int sd = 0, i;
+
+ char updaterequest[sizeof(char)+sizeof(int)*numHostsInSystem+sizeof(unsigned int)*(numHostsInSystem*2)];
+ updaterequest[0] = UPDATE_LIVE_HOSTS;
+
+ for(i = 0; i < numHostsInSystem; i++) {
+ *((int *)(&updaterequest[i*4+1])) = liveHosts[i]; // clean this up later
+ }
+
+ for(i = 0; i < numHostsInSystem*2; i++) {
+ *((unsigned int *)(&updaterequest[i*4+(numHostsInSystem*4)+1])) = locateObjHosts[i]; //ditto
+ }
+
+ //for each machine send data
+ for(i = 1; i < numHostsInSystem; i++) { // hard define num of retries
+ if(i == myIndexInHostArray)
+ continue;
+ if(liveHosts[i] == 1) {
+ if((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
+ printf("updateLiveHosts(): socket create error, attempt %d\n", i);
+ return -1;
+ }
+ send_data(sd, updaterequest, sizeof(updaterequest));
+ }
+ }
+ liveHostsValid = 1;
+ printHostsStatus();
+ return 0;
+}
+
+/*void updateLocateObjHosts(unsigned int failedmid) {
+ int failedmidIndex = findHost(failedmid);
+ int i = 0, validIndex = 0;
+
+ for(; i < numHostsInSystem; i++) {
+ if(locateObjHosts[(i*2)] == failedmid) {
+ while(liveHosts[(i+validIndex)%numHostsInSystem] == 0)
+ validIndex++;
+ locateObjHosts[(i*2)] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
+ validIndex++;
+ while(liveHosts[(i+validIndex)%numHostsInSystem] == 0)
+ validIndex++;
+ locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
+ }
+ else if(locateObjHosts[(i*2)+1] == failedmid) {
+ while(liveHosts[(i+validIndex)%numHostsInSystem] == 0)
+ validIndex++;
+ locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
+ validIndex = 0;
+ }
+ }
+}*/
+
+void setLocateObjHosts() {
+ int i = 0, validIndex = 0;
+
+ //check num hosts even valid first
+
+ for(;i < numHostsInSystem; i++) {
+#ifdef DEBUG
+ printf("%s-> i:%d\n", __func__, i);
+#endif
+
+ while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
+ validIndex++;
+ }
+ locateObjHosts[i*2] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
+#ifdef DEBUG
+ printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2, midtoIPString(locateObjHosts[(i*2)]));
+#endif
+
+ validIndex++;
+ while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
+ validIndex++;
+ }
+#ifdef DEBUG
+ printf("%s-> validIndex:%d, this mid is: [%s]\n", __func__, validIndex, midtoIPString(hostIpAddrs[(i+validIndex)%numHostsInSystem]));
+#endif
+ locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
+ validIndex=0;
+
+#ifdef DEBUG
+ printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2+1, midtoIPString(locateObjHosts[(i*2)+1]));
+#endif
+ }
+}
+
+//debug function
+void printHostsStatus() {
+ int i;
+#ifdef DEBUG
+ printf("%s-> *printing live machines and backups*\n", __func__);
+#endif
+ for(i = 0; i < numHostsInSystem; i++) {
+ if(liveHosts[i]) {
+#ifdef DEBUG
+ printf("%s-> [%s]: LIVE\n", __func__, midtoIPString(hostIpAddrs[i]));
+#endif
+ }
+ else {
+#ifdef DEBUG
+ printf("%s-> [%s]: DEAD\n", __func__, midtoIPString(hostIpAddrs[i]));
+#endif
+ }
+#ifdef DEBUG
+ printf("%s-> original:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2]));
+ printf("%s-> backup:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2+1]));
+#endif
+ }
+}
+
+int allHostsLive() {
+ int i;
+ for(i = 0; i < numHostsInSystem; i++) {
+ if(!liveHosts[i])
+ return 0;
+ }
+ return 1;
+}
+
+void duplicateLostObjects(unsigned int mid){
+
+#ifdef DEBUG
+ printf("%s-> Start, mid: [%s]\n", __func__, midtoIPString(mid));
+#endif
+
+ //this needs to be changed.
+ unsigned int backupMid = getBackupMachine(mid);
+ unsigned int originalMid = getDuplicatedPrimaryMachine(mid);
+
+#ifdef DEBUG
+ printf("%s-> backupMid: [%s], ", __func__, midtoIPString(backupMid));
+ printf("originalMid: [%s]\n", midtoIPString(originalMid));
+#endif
+
+ setLocateObjHosts();
+ printHostsStatus();
+
+ //connect to these machines
+ //go through their object store copying necessary (in a transaction)
+ //transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
+ int sd = 0, i, j, tmpNumLiveHosts = 0;
+
+ if(originalMid == myIpAddr) {
+ originalMid = getPrimaryMachine(mid);
+ printf("originalMid: [%s]\n", midtoIPString(originalMid));
+ duplicateLocalOriginalObjects(originalMid);
+ }
+ else if((sd = getSock2WithLock(transRequestSockPool, originalMid)) < 0) {
+ printf("updateLiveHosts(): socket create error, attempt %d\n", j);
+ //usleep(1000);
+ }
+ else {
+ char duperequest;
+ duperequest = DUPLICATE_ORIGINAL;
+ send_data(sd, &duperequest, sizeof(char));
+#ifdef DEBUG
+ printf("%s-> Sent DUPLICATE_ORIGINAL request\n", __func__);
+#endif
+ originalMid = getPrimaryMachine(mid);
+ printf("originalMid: [%s]\n", midtoIPString(originalMid));
+ send_data(sd, &originalMid, sizeof(unsigned int));
+#ifdef DEBUG
+ printf("%s-> Sent originalMid\n", __func__);
+#endif
+ char response;
+ recv_data_block(sd, &response, sizeof(char));
+ printf("YES! Received %d\n", response);
+ }
+
+ if(backupMid == myIpAddr) {
+ backupMid = getBackupMachine(mid);
+ duplicateLocalBackupObjects(backupMid);
+ }
+ else if((sd = getSock2WithLock(transRequestSockPool, backupMid)) < 0) {
+ printf("updateLiveHosts(): socket create error, attempt %d\n", j);
+ exit(1);
+ }
+ else {
+ char duperequest;
+ duperequest = DUPLICATE_BACKUP;
+ send_data(sd, &duperequest, sizeof(char));
+#ifdef DEBUG
+ printf("%s-> Sent DUPLICATE_BACKUP request\n", __func__);
+#endif
+ backupMid = getBackupMachine(mid);
+ send_data(sd, &backupMid, sizeof(unsigned int));
+#ifdef DEBUG
+ printf("%s-> Sent backupMid\n", __func__);
+#endif
+
+ char response;
+ recv_data_block(sd, &response, sizeof(char));
+ printf("YES! Received %d\n", response);
+ }
+
+#ifdef DEBUG
+ printf("%s-> End\n", __func__);
+#endif
+}
+
+void duplicateLocalBackupObjects(unsigned int mid) {
+ int tempsize, sd;
+ char *dupeptr, ctrl, response;
+
+#ifdef DEBUG
+ printf("%s-> Start; backup mid:%s\n", __func__, midtoIPString(mid));
+#endif
+ //copy code from dstmserver here
+ tempsize = mhashGetDuplicate(&dupeptr, 1);
+
+ printf("tempsize:%d, dupeptrfirstvalue:%d\n", tempsize, *((unsigned int *)(dupeptr)));
+ //send control and dupes after
+ ctrl = RECEIVE_DUPES;
+ if((sd = getSockWithLock(transRequestSockPool, mid)) < 0) {
+ printf("duplicatelocalbackup: socket create error\n");
+ //usleep(1000);
+ }
+
+ printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
+ send_data(sd, &ctrl, sizeof(char));
+ send_data(sd, dupeptr, tempsize);
+ recv_data(sd, &response, sizeof(char));
+ if(response != DUPLICATION_COMPLETE) {
+ //fail message
+ }
+
+ freeSockWithLock(transRequestSockPool, mid, sd);
+#ifdef DEBUG
+ printf("%s-> End\n", __func__);
+#endif
+
+}
+
+void duplicateLocalOriginalObjects(unsigned int mid) {
+ int tempsize, sd;
+ char *dupeptr, ctrl, response;
+
+#ifdef DEBUG
+ printf("%s-> Start\n", __func__);
+#endif
+ //copy code fom dstmserver here
+
+ tempsize = mhashGetDuplicate(&dupeptr, 0);
+
+ //send control and dupes after
+ ctrl = RECEIVE_DUPES;
+
+ if((sd = getSockWithLock(transRequestSockPool, mid)) < 0) {
+ printf("DUPLICATE_ORIGINAL: socket create error\n");
+ //usleep(1000);
+ }
+ printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
+
+ send_data(sd, &ctrl, sizeof(char));
+ send_data(sd, dupeptr, tempsize);
+
+ recv_data(sd, &response, sizeof(char));
+ if(response != DUPLICATION_COMPLETE) {
+ //fail message
+ }
+ freeSockWithLock(transRequestSockPool, mid, sd);
+
+#ifdef DEBUG
+ printf("%s-> End\n", __func__);
+#endif
+
+}
+
+void addHost(unsigned int hostIp) {
+ unsigned int *tmpArray;
+ int *tmpliveHostsArray;
+ unsigned int *tmplocateObjHostsArray;
+
+ if (findHost(hostIp) != -1)
+ return;
+
+ if (numHostsInSystem == sizeOfHostArray) {
+ tmpArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
+ memcpy(tmpArray, hostIpAddrs, sizeof(unsigned int) * numHostsInSystem);
+ free(hostIpAddrs);
+ hostIpAddrs = tmpArray;
+
+ tmpliveHostsArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
+ memcpy(tmpliveHostsArray, liveHosts, sizeof(unsigned int) * numHostsInSystem);
+ free(liveHosts);
+ liveHosts = tmpliveHostsArray;
+
+ tmplocateObjHostsArray = calloc(sizeOfHostArray * 2 * 2, sizeof(unsigned int));
+ memcpy(tmplocateObjHostsArray, locateObjHosts, sizeof(unsigned int) * numHostsInSystem);
+ free(locateObjHosts);
+ locateObjHosts = tmplocateObjHostsArray;
+
+ sizeOfHostArray *= 2;
+ }
+
+ hostIpAddrs[numHostsInSystem] = hostIp;
+ liveHosts[numHostsInSystem] = 0;
+ locateObjHosts[numHostsInSystem*2] = hostIp;
+
+ numHostsInSystem++;
+ return;
+}
+
+int findHost(unsigned int hostIp) {
+ int i;
+ for (i = 0; i < numHostsInSystem; i++)
+ if (hostIpAddrs[i] == hostIp)
+ return i;
+
+ //not found
+ return -1;
+}
+
+/* This function sends notification request per thread waiting on object(s) whose version
+ * changes */
+int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid) {
+ int sock,i;
+ objheader_t *objheader;
+ struct sockaddr_in remoteAddr;
+ char msg[1 + numoid * (sizeof(unsigned short) + sizeof(unsigned int)) + 3 * sizeof(unsigned int)];
+ char *ptr;
+ int bytesSent;
+ int status, size;
+ unsigned short version;
+ unsigned int oid,mid;
+ static unsigned int threadid = 0;
+ pthread_mutex_t threadnotify = PTHREAD_MUTEX_INITIALIZER; //Lock and condition var for threadjoin and notification
+ pthread_cond_t threadcond = PTHREAD_COND_INITIALIZER;
+ notifydata_t *ndata;
+
+ oid = oidarry[0];
+ if((mid = lhashSearch(oid)) == 0) {
+ printf("Error: %s() No such machine found for oid =%x\n",__func__, oid);
+ return;
+ }
+
+ if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("reqNotify():socket()");
+ return -1;
+ }
+
+ bzero(&remoteAddr, sizeof(remoteAddr));
+ remoteAddr.sin_family = AF_INET;
+ remoteAddr.sin_port = htons(LISTEN_PORT);
+ remoteAddr.sin_addr.s_addr = htonl(mid);
+
+ /* Generate unique threadid */
+ threadid++;
+
+ /* Save threadid, numoid, oidarray, versionarray, pthread_cond_variable for later processing */
+ if((ndata = calloc(1, sizeof(notifydata_t))) == NULL) {
+ printf("Calloc Error %s, %d\n", __FILE__, __LINE__);
+ return -1;
+ }
+ ndata->numoid = numoid;
+ ndata->threadid = threadid;
+ ndata->oidarry = oidarry;
+ ndata->versionarry = versionarry;
+ ndata->threadcond = threadcond;
+ ndata->threadnotify = threadnotify;
+ if((status = notifyhashInsert(threadid, ndata)) != 0) {
+ printf("reqNotify(): Insert into notify hash table not successful %s, %d\n", __FILE__, __LINE__);
+ free(ndata);
+ return -1;
+ }
+
+ /* Send number of oids, oidarry, version array, machine id and threadid */
+ if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
+ printf("reqNotify():error %d connecting to %s:%d\n", errno,
+ inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
+ free(ndata);
+ return -1;
+ } else {
+ msg[0] = THREAD_NOTIFY_REQUEST;
+ *((unsigned int *)(&msg[1])) = numoid;
+ /* Send array of oids */
+ size = sizeof(unsigned int);
+
+ for(i = 0;i < numoid; i++) {
+ oid = oidarry[i];
+ *((unsigned int *)(&msg[1] + size)) = oid;
+ size += sizeof(unsigned int);
+ }
+
+ /* Send array of version */
+ for(i = 0;i < numoid; i++) {
+ version = versionarry[i];
+ *((unsigned short *)(&msg[1] + size)) = version;
+ size += sizeof(unsigned short);
+ }
+
+ *((unsigned int *)(&msg[1] + size)) = myIpAddr; size += sizeof(unsigned int);
+ *((unsigned int *)(&msg[1] + size)) = threadid;
+ pthread_mutex_lock(&(ndata->threadnotify));
+ size = 1 + numoid * (sizeof(unsigned int) + sizeof(unsigned short)) + 3 * sizeof(unsigned int);
+ send_data(sock, msg, size);
+ pthread_cond_wait(&(ndata->threadcond), &(ndata->threadnotify));
+ pthread_mutex_unlock(&(ndata->threadnotify));
+ }
+
+ pthread_cond_destroy(&threadcond);
+ pthread_mutex_destroy(&threadnotify);
+ free(ndata);
+ close(sock);
+ return status;
+}
+
+void threadNotify(unsigned int oid, unsigned short version, unsigned int tid) {
+ notifydata_t *ndata;
+ int i, objIsFound = 0, index;
+ void *ptr;
+
+ //Look up the tid and call the corresponding pthread_cond_signal
+ if((ndata = notifyhashSearch(tid)) == NULL) {
+ printf("threadnotify(): No such threadid is present %s, %d\n", __FILE__, __LINE__);
+ return;
+ } else {
+ for(i = 0; i < ndata->numoid; i++) {
+ if(ndata->oidarry[i] == oid) {
+ objIsFound = 1;
+ index = i;
+ }
+ }
+ if(objIsFound == 0) {
+ printf("threadNotify(): Oid not found %s, %d\n", __FILE__, __LINE__);
+ return;
+ } else {
+ if(version <= ndata->versionarry[index]) {
+ printf("threadNotify(): New version %d has not changed since last version for oid = %d, %s, %d\n", version, oid, __FILE__, __LINE__);
+ return;
+ } else {
+#ifdef CACHE
+ /* Clear from prefetch cache and free thread related data structure */
+ if((ptr = prehashSearch(oid)) != NULL) {
+ prehashRemove(oid);
+ }
+#endif
+ pthread_mutex_lock(&(ndata->threadnotify));
+ pthread_cond_signal(&(ndata->threadcond));
+ pthread_mutex_unlock(&(ndata->threadnotify));
+ }
+ }
+ }
+ return;
+}
+
+int notifyAll(threadlist_t **head, unsigned int oid, unsigned int version) {
+ threadlist_t *ptr;
+ unsigned int mid;
+ struct sockaddr_in remoteAddr;
+ char msg[1 + sizeof(unsigned short) + 2*sizeof(unsigned int)];
+ int sock, status, size, bytesSent;
+
+ while(*head != NULL) {
+ ptr = *head;
+ mid = ptr->mid;
+ //create a socket connection to that machine
+ if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("notifyAll():socket()");
+ return -1;
+ }
+
+ bzero(&remoteAddr, sizeof(remoteAddr));
+ remoteAddr.sin_family = AF_INET;
+ remoteAddr.sin_port = htons(LISTEN_PORT);
+ remoteAddr.sin_addr.s_addr = htonl(mid);
+ //send Thread Notify response and threadid to that machine
+ if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
+ printf("notifyAll():error %d connecting to %s:%d\n", errno,
+ inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
+ fflush(stdout);
+ status = -1;
+ } else {
+ bzero(msg, (1+sizeof(unsigned short) + 2*sizeof(unsigned int)));
+ msg[0] = THREAD_NOTIFY_RESPONSE;
+ *((unsigned int *)&msg[1]) = oid;
+ size = sizeof(unsigned int);
+ *((unsigned short *)(&msg[1]+ size)) = version;
+ size+= sizeof(unsigned short);
+ *((unsigned int *)(&msg[1]+ size)) = ptr->threadid;
+
+ size = 1 + 2*sizeof(unsigned int) + sizeof(unsigned short);
+ send_data(sock, msg, size);
+ }
+ //close socket
+ close(sock);
+ // Update head
+ *head = ptr->next;
+ free(ptr);
+ }
+ return status;
+}
+
+void transAbort() {
+#ifdef ABORTREADERS
+ removetransactionhash();
+#endif
+ objstrDelete(t_cache);
+ t_chashDelete();
+}
+
+/* This function inserts necessary information into
+ * a machine pile data structure */
+plistnode_t *pInsert(plistnode_t *pile, objheader_t *headeraddr, unsigned int mid, int num_objs) {
+ plistnode_t *ptr, *tmp;
+ int found = 0, offset = 0;
+ char ip[16];
+ tmp = pile;
+ //Add oid into a machine that is already present in the pile linked list structure
+ while(tmp != NULL) {
+ if (tmp->mid == mid) {
+ int tmpsize;
+
+ if (STATUS(headeraddr) & NEW) {
+ tmp->oidcreated[tmp->numcreated] = OID(headeraddr);
+ tmp->numcreated++;
+ GETSIZE(tmpsize, headeraddr);
+ tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
+ /*if(numHostsInSystem > 1) {
+ STATUS(headeraddr) = DIRTY;
+ //printf("Redo pInsert for oid %d, now modified\n", OID(headeraddr));
+ //printf("this machine: %d\n", mid);
+ midtoIP(tmp->mid, ip);
+ pile = pInsert(tmp, headeraddr, locateBackupMachine(headeraddr), num_objs);
+
+ // printf("header version: %d\n", headeraddr->version);
+ //printf("Finished Redo pInsert for oid %d, now modified\n", OID(headeraddr));
+ }*/
+ } else if (STATUS(headeraddr) & DIRTY) {
+ tmp->oidmod[tmp->nummod] = OID(headeraddr);
+ tmp->nummod++;
+ GETSIZE(tmpsize, headeraddr);
+ tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
+ /* midtoIP(tmp->mid, ip);
+ printf("pp; Redo? pile->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);*/
+ } else {
+ offset = (sizeof(unsigned int) + sizeof(short)) * tmp->numread;
+ *((unsigned int *)(((char *)tmp->objread) + offset))=OID(headeraddr);
+ offset += sizeof(unsigned int);
+ *((short *)(((char *)tmp->objread) + offset)) = headeraddr->version;
+ tmp->numread++;
+ }
+ found = 1;
+ break;
+ }
+ tmp = tmp->next;
+ }
+ //Add oid for any new machine
+ if (!found) {
+ int tmpsize;
+ if((ptr = pCreate(num_objs)) == NULL) {
+ return NULL;
+ }
+ ptr->mid = mid;
+ if (STATUS(headeraddr) & NEW) {
+ ptr->oidcreated[ptr->numcreated] = OID(headeraddr);
+ ptr->numcreated++;
+ GETSIZE(tmpsize, headeraddr);
+ ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
+ /*if(numHostsInSystem > 1) {
+ STATUS(headeraddr) = DIRTY;
+ midtoIP(ptr->mid, ip);
+
+ printf("np; ptr->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);
+ //printf("header version: %d\n", headeraddr->version);
+ pile = pInsert(tmp, headeraddr, locateBackupMachine(headeraddr), num_objs);
+ //printf("header version: %d\n", headeraddr->version);
+ }*/
+ } else if (STATUS(headeraddr) & DIRTY) {
+ ptr->oidmod[ptr->nummod] = OID(headeraddr);
+ ptr->nummod++;
+ GETSIZE(tmpsize, headeraddr);
+ ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
+ //printf("Redo oid %d?\n", OID(headeraddr));
+ /* midtoIP(ptr->mid, ip);
+ printf("np; Redo? ptr->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);*/
+ } else {
+ *((unsigned int *)ptr->objread)=OID(headeraddr);
+ offset = sizeof(unsigned int);
+ *((short *)(((char *)ptr->objread) + offset)) = headeraddr->version;
+ ptr->numread++;
+ }
+ ptr->next = pile;
+ pile = ptr;
+ }
+
+ /* Clear Flags */
+ STATUS(headeraddr) = 0;
+
+ return pile;
+}
+
+plistnode_t *sortPiles(plistnode_t *pileptr) {
+ plistnode_t *head, *ptr, *tail;
+ head = pileptr;
+ ptr = pileptr;
+ /* Get tail pointer */
+ while(ptr!= NULL) {
+ tail = ptr;
+ ptr = ptr->next;
+ }
+ ptr = pileptr;
+ plistnode_t *prev = pileptr;
+ /* Arrange local machine processing at the end of the pile list */
+ while(ptr != NULL) {
+ if(ptr != tail) {
+ if(ptr->mid == myIpAddr && (prev != pileptr)) {
+ prev->next = ptr->next;
+ ptr->next = NULL;
+ tail->next = ptr;
+ return pileptr;
+ }
+ if((ptr->mid == myIpAddr) && (prev == pileptr)) {
+ prev = ptr->next;
+ ptr->next = NULL;
+ tail->next = ptr;
+ return prev;
+ }
+ prev = ptr;
+ }
+ ptr = ptr->next;
+ }
+ return pileptr;
+}
+
+/* Paxo Algorithm:
+ * Executes when the known leader has failed.
+ * Guarantees consensus on next leader among all live hosts. */
+int paxos()
+{
+ int origRound = paxosRound;
+ origleader = leader;
+ int ret = -1;
+#ifdef DEBUG
+ printf(">> Debug : Starting paxos..\n");
+#endif
+
+ do {
+ ret = paxosPrepare(); // phase 1
+ if (ret == 1) {
+ ret = paxosAccept(); // phase 2
+ if (ret == 1) {
+ paxosLearn(); // phase 3
+ break;
+ }
+ }
+ // Paxos not successful; wait and retry if new leader is not yet slected
+ sleep(WAIT_TIME);
+ if(paxosRound != origRound)
+ break;
+ } while (ret == -1);
+
+#ifdef DEBUG
+ printf("\n>> Debug : Leader : [%s]\n", midtoIPString(leader));
+#endif
+
+ return ret;
+}
+
+int paxosPrepare()
+{
+ char control;
+ //int origleader = leader;
+ int remote_n;
+ int remote_v;
+ int tmp_n = -1;
+ int cnt = 0;
+ int sd;
+ int i;
+ temp_v_a = v_a;
+ my_n = n_h + 1;
+
+#ifdef DEBUG
+ printf("[Prepare]...\n");
+#endif
+
+ temp_v_a = myIpAddr; // if no other value is proposed, make this machine the new leader
+
+ for (i = 0; i < numHostsInSystem; ++i) {
+ control = PAXOS_PREPARE;
+ if(!liveHosts[i])
+ continue;
+
+ if ((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
+ printf("paxosPrepare(): socket create error\n");
+ continue;
+ }
+#ifdef DEBUG
+ printf("%s-> Send PAXOS_PREPARE to mid [%s] with my_n=%d\n", __func__, midtoIPString(hostIpAddrs[i]), my_n);
+#endif
+ send_data(sd, &control, sizeof(char));
+ send_data(sd, &my_n, sizeof(int));
+ recv_data(sd, &control, sizeof(char));
+ if ((sd == -1) || (timeoutFlag == 1)) {
+#ifdef DEBUG
+ printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
+#endif
+ timeoutFlag = 0;
+ continue;
+ }
+
+ switch (control) {
+ case PAXOS_PREPARE_OK:
+ cnt++;
+ recv_data(sd, &remote_n, sizeof(int));
+ recv_data(sd, &remote_v, sizeof(int));
+#ifdef DEBUG
+ printf("%s-> Received PAXOS_PREPARE_OK from mindex [%d] with remote_v=%s\n", __func__, i, midtoIPString(remote_v));
+#endif
+ if(remote_v != origleader) {
+ if (remote_n > tmp_n) {
+ tmp_n = remote_n;
+ temp_v_a = remote_v;
+ }
+ }
+ break;
+ case PAXOS_PREPARE_REJECT:
+ break;
+ }
+ }
+
+#ifdef DEBUG
+ printf("%s-> cnt:%d, numLiveHostsInSystem:%d\n", __func__, cnt, numLiveHostsInSystem);
+#endif
+
+ if (cnt >= (numLiveHostsInSystem / 2)) { // majority of OK replies
+ return 1;
+ }
+ else {
+ return -1;
+ }
+}
+
+int paxosAccept()
+{
+ char control;
+ int i;
+ int cnt = 0;
+ int sd;
+ int remote_v = temp_v_a;
+
+#ifdef DEBUG
+ printf("[Accept]...\n");
+#endif
+
+ for (i = 0; i < numHostsInSystem; ++i) {
+ control = PAXOS_ACCEPT;
+ if(!liveHosts[i])
+ continue;
+
+ if ((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
+ printf("paxosAccept(): socket create error\n");
+ continue;
+ }
+
+ send_data(sd, &control, sizeof(char));
+ send_data(sd, &my_n, sizeof(int));
+ send_data(sd, &remote_v, sizeof(int));
+
+ recv_data(sd, &control, sizeof(char));
+ if ((sd == -1) || (timeoutFlag == 1)) {
+#ifdef DEBUG
+ printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
+#endif
+ timeoutFlag = 0;
+ continue;
+ }
+
+ switch (control) {
+ case PAXOS_ACCEPT_OK:
+ cnt++;
+ break;
+ case PAXOS_ACCEPT_REJECT:
+ break;
+ }
+#ifdef DEBUG
+ printf(">> Debug : Accept - n_h [%d], n_a [%d], v_a [%s]\n", n_h, n_a, midtoIPString(v_a));
+#endif
+ }
+
+ if (cnt >= (numLiveHostsInSystem / 2)) {
+ return 1;
+ }
+ else {
+ return -1;
+ }
+}
+
+void paxosLearn()
+{
+ char control;
+ int sd;
+ int i;
+
+#ifdef DEBUG
+ printf("[Learn]...\n");
+#endif
+
+ control = PAXOS_LEARN;
+ // transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
+
+ for (i = 0; i < numHostsInSystem; ++i) {
+ if(!liveHosts[i])
+ continue;
+ if(hostIpAddrs[i] == myIpAddr)
+ {
+ leader = v_a;
+ paxosRound++;
+#ifdef DEBUG
+ printf("This is my leader!!!: [%s]\n", midtoIPString(leader));
+#endif
+ continue;
+ }
+ if ((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
+ continue;
+ // printf("paxosLearn(): socket create error, attemp\n");
+ }
+
+ send_data(sd, &control, sizeof(char));
+ send_data(sd, &v_a, sizeof(int));
+ }
+ //return v_a;
+}
--- /dev/null
+#ifndef TRANS_H
+#define TRANS_H
+
+extern __thread objstr_t *t_cache;
+extern __thread struct ___Object___ *revertlist;
+#ifdef ABORTREADERS
+extern __thread int t_abort;
+extern __thread jmp_buf aborttrans;
+#endif
+
+#endif