create new Genome directory and add files

author adash <adash>

Tue, 9 Jun 2009 01:31:41 +0000 (01:31 +0000)

committer adash <adash>

Tue, 9 Jun 2009 01:31:41 +0000 (01:31 +0000)
author adash <adash>
Tue, 9 Jun 2009 01:31:41 +0000 (01:31 +0000)
committer adash <adash>
Tue, 9 Jun 2009 01:31:41 +0000 (01:31 +0000)
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java b/Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java

new file mode 100644 (file)

index 0000000..f911524
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java
@@ -0,0 +1,194 @@
+public class Bitmap {
+  public int numBit;
+  public int numWord;
+  public int bits[];
+  
+  public int NUM_BIT_PER_BYTE;
+  public int NUM_BIT_PER_WORD;
+
+  
+  /* =============================================================================
+   * bitmap_alloc
+   * -- Returns NULL on failure
+   * =============================================================================
+   */
+  Bitmap(int myNumBit) {
+
+    NUM_BIT_PER_BYTE = 8;
+    NUM_BIT_PER_WORD = ((8) * NUM_BIT_PER_BYTE);
+
+    numBit = myNumBit;
+    numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD);
+
+    bits = new int[numWord];
+    
+    int i = 0;
+    for(i = 0; i < numWord; i++) {
+      bits[i] = 0;
+    }
+  }
+
+  Bitmap(Bitmap myBitMap) {
+    NUM_BIT_PER_BYTE = 8;
+    NUM_BIT_PER_WORD = ((8) * NUM_BIT_PER_BYTE);
+
+
+    numBit = myBitMap.numBit;
+    numWord = myBitMap.numWord;
+    bits = new int[numWord];
+    int i = 0;
+    for(i = 0; i < numWord; i++) {
+      bits[i] = myBitMap.bits[i];
+    }
+  }
+
+  /* =============================================================================
+   * bitmap_set
+   * -- Sets ith bit to 1
+   * -- Returns TRUE on success, else FALSE
+   * =============================================================================
+   */
+  boolean set (int i) {
+    if ((i < 0) || (i >= numBit)) {
+      return false;
+    }
+
+    bits[((int)i)/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD));
+
+    return true;
+  }
+
+
+  /* =============================================================================
+   * bitmap_clear
+   * -- Clears ith bit to 0
+   * -- Returns TRUE on success, else FALSE
+   * =============================================================================
+   */
+  boolean clear (int i) {
+      if ((i < 0) || (i >= numBit)) {
+      return false;
+    }
+
+    bits[((int)i)/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD));
+
+    return true;
+  }
+
+
+  /* =============================================================================
+   * bitmap_clearAll
+   * -- Clears all bit to 0
+   * =============================================================================
+   */
+  void clearAll () {
+    int i = 0;
+    for(i = 0; i < numWord; i++) {
+      bits[i] = 0;
+    }
+  }
+
+
+  /* =============================================================================
+   * bitmap_isSet
+   * -- Returns TRUE if ith bit is set, else FALSE
+   * =============================================================================
+   */
+  boolean isSet (int i) {
+    int tempB = (int)bits[((int)i)/NUM_BIT_PER_WORD];
+    int tempC = (1 << (((int)i) % NUM_BIT_PER_WORD));
+    boolean tempbool = ((tempB & tempC) > 0) ? true:false;
+    //tempB /*bits[((int)i)/NUM_BIT_PER_WORD]*/ & tempC /*(1 << (i % NUM_BIT_PER_WORD))*/ 
+    if ((i >= 0) && (i < (int)numBit) && tempbool) {
+        return true;
+    }
+
+    return false;
+  }
+
+
+  /* =============================================================================
+   * bitmap_findClear
+   * -- Returns index of first clear bit
+   * -- If start index is negative, will start from beginning
+   * -- If all bits are set, returns -1
+   * =============================================================================
+   */
+  int findClear (int startIndex) {
+    int i;
+    boolean tempbool = ((bits[((int)i)/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) > 0) ? true:false;
+    for (i = MAX(startIndex, 0); i < numBit; i++) {
+        if (!tempbool) {
+            return i;
+        }
+    }
+
+    return -1;
+  }
+
+
+  /* =============================================================================
+   * bitmap_findSet
+   * -- Returns index of first set bit
+   * -- If all bits are clear, returns -1
+   * =============================================================================
+   */
+  int findSet (int startIndex) {
+    int i;
+
+    for (i = MAX(startIndex, 0); i < numBit; i++) {
+      boolean tempbool = ((int)bits[((int)i)/NUM_BIT_PER_WORD] & (1 << ((int)i % NUM_BIT_PER_WORD)) > 0) ? true:false;
+        if (tempbool) {
+            return i;
+        }
+    }
+
+    return -1;
+  }
+
+
+  /* =============================================================================
+   * bitmap_getNumClear
+   * =============================================================================
+   */
+  int getNumClear () {
+    return (numBit - getNumSet());
+  }
+
+
+  /* =============================================================================
+   * bitmap_getNumSet
+   * =============================================================================
+   */
+  int getNumSet () {
+    int i;
+    int count = 0;
+    for (i = 0; i < numBit; i++) {
+        boolean tempbool = ((int)bits[((int)i)/NUM_BIT_PER_WORD] & (1 << ((int)i % NUM_BIT_PER_WORD)) > 0) ? true:false;
+        if (tempbool) {
+            count++;
+        }
+    }
+
+    return count;
+  }
+
+  /* =============================================================================
+   * bitmap_toggleAll
+   * =============================================================================
+   */
+  void toggleAll () {
+    int w;
+    for (w = 0; w < numWord; w++) {
+      bits[w] ^= -1;
+    }
+  }
+
+  int DIVIDE_AND_ROUND_UP(int a, int b) {
+    return (a/b) + (((a % b) > 0) ? (1) : (0));
+  }
+
+  int MAX(int a, int b) {
+    return (a > b) ? a : b; 
+  }  
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Gene.java b/Robust/src/Benchmarks/SingleTM/Genome/Gene.java

new file mode 100644 (file)

index 0000000..c69b388
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Gene.java
@@ -0,0 +1,37 @@
+public class Gene {
+  public int length;
+  public String contents;
+  public Bitmap startBitmapPtr; /* used for creating segments */
+  
+  Gene(int myLength) {
+    length = myLength;
+    contents = "";
+    startBitmapPtr = new Bitmap(length);
+  }
+
+
+/* =============================================================================
+ * gene_create
+ * -- Populate contents with random gene
+ * =============================================================================
+ */
+  void create (Random randomObj) {
+    int i;
+    char[] nucleotides = new char[4];
+    char[] arrayContents = new char[length];
+    nucleotides[0] = 'a';
+    nucleotides[1] = 'c';
+    nucleotides[2] = 'g';
+    nucleotides[3] = 't';
+
+    for (i = 0; i < length; i++) {
+      int legitimateNumber = (int)randomObj.random_generate(); 
+      if(legitimateNumber < 0) {
+        legitimateNumber *= -1;
+      }
+      arrayContents[i] = nucleotides[legitimateNumber % 4];
+    }
+    
+    contents = new String(arrayContents);
+  }  
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Genome.java b/Robust/src/Benchmarks/SingleTM/Genome/Genome.java

new file mode 100644 (file)

index 0000000..4fa2600
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Genome.java
@@ -0,0 +1,131 @@
+public class Genome extends Thread {
+  int geneLength;
+  int segmentLength;
+  int minNumSegment;
+  int numThread;
+
+  int threadid;
+
+  // add segments, random, etc to member variables
+  // include in constructor
+  // allows for passing in thread run function
+  Random randomPtr;
+  Gene genePtr;
+  Segments segmentsPtr;
+  Sequencer sequencerPtr;
+
+  Genome(String x[]) {
+    parseCmdLine(x);
+    if(numThread == 0) {
+      numThread = 1;
+    }
+
+    randomPtr = new Random();
+    randomPtr.random_alloc();
+    randomPtr.random_seed(0);
+
+    genePtr = new Gene(geneLength);
+    genePtr.create(randomPtr);
+
+    segmentsPtr = new Segments(segmentLength, minNumSegment);
+    segmentsPtr.create(genePtr, randomPtr);
+
+    sequencerPtr = new Sequencer(geneLength, segmentLength, segmentsPtr);
+  }
+
+  Genome(int myThreadid, int myGeneLength, int mySegLength, int myMinNumSegs, int myNumThread, Random myRandomPtr, Gene myGenePtr, Segments mySegmentsPtr, Sequencer mySequencerPtr) {
+    threadid = myThreadid;
+    geneLength = myGeneLength;
+    segmentLength = mySegLength;
+    minNumSegment = myMinNumSegs;
+    numThread = myNumThread;
+
+    randomPtr = myRandomPtr;
+    genePtr = myGenePtr;
+    segmentsPtr = mySegmentsPtr;
+    sequencerPtr = mySequencerPtr;
+  }
+
+  public void parseCmdLine(String args[]) {
+    int i = 0;
+    String arg;
+    while (i < args.length && args[i].startsWith("-")) {
+      arg = args[i++];
+      //check options
+      if(arg.equals("-g")) {
+        if(i < args.length) {
+          this.geneLength = new Integer(args[i++]).intValue();
+        }
+      } else if(arg.equals("-s")) {
+        if(i < args.length) {
+          this.segmentLength = new Integer(args[i++]).intValue();
+        }
+      } else if(arg.equals("-n")) {
+        if(i < args.length) {
+          this.minNumSegment = new Integer(args[i++]).intValue();
+        }
+      } else if(arg.equals("-t")) {
+        if(i < args.length) {
+          this.numThread = new Integer(args[i++]).intValue();
+        }
+      } 
+    }
+
+  }
+
+  public void run() {
+    Barrier.enterBarrier();
+    Sequencer.run(threadid, numThread, randomPtr, sequencerPtr); 
+    Barrier.enterBarrier();
+  }
+
+  public static void main(String x[]){
+
+    System.out.print("Creating gene and segments... ");
+    Genome g = new Genome(x);
+
+    System.out.println("done.");
+    System.out.println("Gene length     = " + g.genePtr.length);
+    System.out.println("Segment length  = " + g.segmentsPtr.length);
+    System.out.println("Number segments = " + g.segmentsPtr.contentsPtr.size());
+    System.out.println("Number threads  = " + g.numThread);
+
+
+    Barrier.setBarrier((int)g.numThread);
+
+    /* Create and Start Threads */
+
+    String gene = g.genePtr.contents;
+    Genome[] gn = new Genome[g.numThread];
+
+    for(int i = 1; i<g.numThread; i++) {
+      gn[i] = new Genome(i, g.geneLength, g.segmentLength, g.minNumSegment, g.numThread, g.randomPtr, g.genePtr, g.segmentsPtr, g.sequencerPtr);
+    }
+
+    System.out.print("Sequencing gene... ");    
+
+    for(int i = 1; i<g.numThread; i++) {
+      gn[i].start();
+    }
+
+    Barrier.enterBarrier();
+    Sequencer.run(0, g.numThread, g.randomPtr, g.sequencerPtr); 
+    Barrier.enterBarrier();
+
+
+    System.out.println("done.");
+
+    /* Check result */
+    {
+      String sequence = g.sequencerPtr.sequence;
+      boolean result = (gene.compareTo(sequence) == 0) ? true:false;
+      System.out.println("Sequence matches gene: " + (result ? "yes" : "no"));
+      //DEBUG
+      //if (result) {
+      // System.out.println("gene     = " + gene);
+      // System.out.println("sequence = " + sequence);
+      //}
+    }
+
+  }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java b/Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java

new file mode 100644 (file)

index 0000000..449e0e4
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java
@@ -0,0 +1,65 @@
+public class Hashtable {
+    List buckets[];
+    int numBucket;
+    int size;
+    int resizeRatio;
+    int growthFactor;
+    
+    
+    public Hashtable (int initNumBucket, int resizeRatio, int growthFactor) {
+
+      allocBuckets(initNumBucket);
+      numBucket = initNumBucket;
+      size = 0;
+      resizeRatio = ((resizeRatio < 0) ? 3 : resizeRatio);
+      growthFactor = ((growthFactor < 0) ? 3 : growthFactor);
+    }
+    
+    public boolean TMhashtable_insert (String keyPtr, String dataPtr) {
+      int i = hashSegment(keyPtr) % numBucket;
+
+      Pair findPair = new Pair();
+      findPair.firstPtr = keyPtr;
+      Pair pairPtr = buckets[(int)i].find(findPair);
+      if (pairPtr != null) {
+          return false;
+      }
+
+      Pair insertPtr = new Pair(keyPtr, dataPtr);
+
+      /* Add new entry  */
+      if (buckets[(int)i].insert(insertPtr) == false) {
+          return false;
+      }
+
+      size++;
+
+      return true;
+    }
+    
+    void allocBuckets (int numBucket) {
+      int i;
+      /* Allocate bucket: extra bucket is dummy for easier iterator code */
+      buckets = new List[numBucket+1];
+      
+      for (i = 0; i < (numBucket + 1); i++) {
+          List chainPtr = new List();
+          buckets[(int)i] = chainPtr;
+      }
+    }
+    
+    int hashSegment (String str) {
+      int hash = 0;
+
+      int index = 0;
+      /* Note: Do not change this hashing scheme */
+      for(index = 0; index < str.length(); index++) {
+        char c = str.charAt(index);
+        hash = c + (hash << 6) + (hash << 16) - hash;
+      }
+  
+      if(hash < 0) hash *= -1;
+
+      return hash;
+    }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/List.java b/Robust/src/Benchmarks/SingleTM/Genome/List.java

new file mode 100644 (file)

index 0000000..68386a8
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/List.java
@@ -0,0 +1,66 @@
+public class List {
+  ListNode head;
+  int size;
+
+  public List () {
+    head = new ListNode();
+    head.dataPtr = null;
+    head.nextPtr = null;
+    size = 0;
+  }
+
+  Pair find (Pair dataPtr) {
+    ListNode nodePtr;
+    ListNode prevPtr = findPrevious(dataPtr);
+
+    nodePtr = prevPtr.nextPtr;
+
+    if ((nodePtr == null) || (compareSegment(nodePtr.dataPtr, dataPtr) != 0)) {
+      return null;
+    }
+
+    return (nodePtr.dataPtr);
+  }
+
+  ListNode findPrevious (Pair dataPtr) {
+    ListNode prevPtr = head;
+    ListNode nodePtr;
+    nodePtr = prevPtr.nextPtr;
+
+    for (; nodePtr != null; nodePtr = nodePtr.nextPtr) {
+      if (compareSegment(nodePtr.dataPtr, dataPtr) >= 0) {
+        return prevPtr;
+      }
+      prevPtr = nodePtr;
+    }
+
+    return prevPtr;
+  }
+
+  boolean insert (Pair dataPtr) {
+    ListNode prevPtr;
+    ListNode nodePtr;
+    ListNode currPtr;
+
+    prevPtr = findPrevious(dataPtr);
+    currPtr = prevPtr.nextPtr;
+
+    if ((currPtr != null) && (compareSegment((Pair)currPtr.dataPtr, (Pair)dataPtr) == 0)) {
+      return false;
+    }
+
+    nodePtr = new ListNode(dataPtr);
+
+    nodePtr.nextPtr = currPtr;
+    prevPtr.nextPtr = nodePtr;
+    size++;
+
+    return true;
+  }
+
+  int compareSegment (Pair a, Pair b) { 
+    String aString = a.firstPtr;
+    String bString = b.firstPtr;
+    return aString.compareTo(bString);
+  }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/ListNode.java b/Robust/src/Benchmarks/SingleTM/Genome/ListNode.java

new file mode 100644 (file)

index 0000000..ee48380
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/ListNode.java
@@ -0,0 +1,14 @@
+public class ListNode {
+    Pair dataPtr;
+    ListNode nextPtr;
+    
+    public ListNode () {
+      dataPtr = null;
+      nextPtr = null;
+    }
+    
+    public ListNode (Pair myDataPtr) {
+      dataPtr = myDataPtr;
+      nextPtr = null;
+    } 
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Pair.java b/Robust/src/Benchmarks/SingleTM/Genome/Pair.java

new file mode 100644 (file)

index 0000000..89886bd
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Pair.java
@@ -0,0 +1,14 @@
+public class Pair {
+    String firstPtr;
+    String secondPtr;
+    
+    public Pair() {
+      firstPtr = null;
+      secondPtr = null;
+    }
+    
+    public Pair(String myFirstPtr, String mySecondPtr) { 
+      firstPtr = myFirstPtr;
+      secondPtr = mySecondPtr;
+    }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Segments.java b/Robust/src/Benchmarks/SingleTM/Genome/Segments.java

new file mode 100644 (file)

index 0000000..de1495b
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Segments.java
@@ -0,0 +1,70 @@
+public class Segments {
+  public int length;
+  public int minNum;
+  Vector contentsPtr;
+  /* private: */
+  String strings[];
+
+  Segments (int myLength, int myMinNum) {
+    minNum = myMinNum;
+    length = myLength;
+
+    strings = new String[(int)minNum];
+    contentsPtr = new Vector((int)minNum);
+  }
+
+
+  /* =============================================================================
+   * segments_create
+   * -- Populates 'contentsPtr'
+   * =============================================================================
+   */
+  void create (Gene genePtr, Random randomPtr) {
+    String geneString;
+    int geneLength;
+    Bitmap startBitmapPtr;
+    int numStart;
+    int i;
+    int maxZeroRunLength;
+
+    geneString = genePtr.contents;
+    geneLength = genePtr.length;
+    startBitmapPtr = genePtr.startBitmapPtr;
+    numStart = geneLength - length + 1;
+
+    /* Pick some random segments to start */
+    for (i = 0; i < minNum; i++) {
+      int j = (int)(randomPtr.random_generate() % numStart);
+      boolean status = startBitmapPtr.set(j);
+      strings[i] = geneString.substring((int)j, (int)(j+length)); // WRITE SUBSTRING FUNCTION
+      contentsPtr.addElement(strings[i]);
+    }
+
+    /* Make sure segment covers start */
+    i = 0;
+    if (!startBitmapPtr.isSet(i)) {
+      String string;
+      string = geneString.subString((int)i, (int)(i+length)); // USE BYTE SUBSTRING FUNCTION
+      contentsPtr.addElement(string);
+      startBitmapPtr.set(i);
+    }
+
+    /* Add extra segments to fill holes and ensure overlap */
+    maxZeroRunLength = length - 1;
+    for (i = 0; i < numStart; i++) {
+      int i_stop = Math.imin((i+maxZeroRunLength), numStart);
+      for ( /* continue */; i < i_stop; i++) {
+        if (startBitmapPtr.isSet(i)) {
+          break;
+        }
+      }
+      if (i == i_stop) {
+        /* Found big enough hole */
+        i = i - 1;
+        String string = geneString.subString((int)i, (int)(i+length)); // USE BYTE SUBSTRING FUNCTION
+        contentsPtr.addElement(string);
+        startBitmapPtr.set(i);
+      }
+    }
+  }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java b/Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java

new file mode 100644 (file)

index 0000000..6540734
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java
@@ -0,0 +1,400 @@
+public class Sequencer {
+
+  public String sequence;
+
+  public Segments segmentsPtr;
+
+  /* For removing duplicate segments */
+  Hashtable uniqueSegmentsPtr;
+
+  /* For matching segments */
+  endInfoEntry endInfoEntries[];
+  Table startHashToConstructEntryTables[];
+
+  /* For constructing sequence */
+  constructEntry constructEntries[];
+  Table hashToConstructEntryTable;
+
+  /* For deallocation */
+  int segmentLength;
+
+
+  /* =============================================================================
+   * sequencer_alloc
+   * -- Returns NULL on failure
+   * =============================================================================
+   */
+  public Sequencer (int myGeneLength, int mySegmentLength, Segments mySegmentsPtr) { 
+
+    int maxNumUniqueSegment = myGeneLength - mySegmentLength + 1;
+    int i;
+
+    uniqueSegmentsPtr = new Hashtable((int)myGeneLength, -1, -1);
+
+    /* For finding a matching entry */
+    endInfoEntries = new endInfoEntry[maxNumUniqueSegment];
+    for (i = 0; i < maxNumUniqueSegment; i++) {
+      endInfoEntries[i] = new endInfoEntry(true, 1);
+    }
+
+    startHashToConstructEntryTables = new Table[mySegmentLength];
+    for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */
+      startHashToConstructEntryTables[i] = new Table(myGeneLength);
+    }
+    segmentLength = mySegmentLength;
+
+    /* For constructing sequence */
+    constructEntries = new constructEntry[maxNumUniqueSegment];
+
+    for (i= 0; i < maxNumUniqueSegment; i++) {
+      constructEntries[i] = new constructEntry(null, true, 0, null, null, null, 0, segmentLength);
+    }
+    hashToConstructEntryTable = new Table(myGeneLength);
+
+    segmentsPtr = mySegmentsPtr;  
+  }
+
+
+  /* =============================================================================
+   * sequencer_run
+   * =============================================================================
+   */
+
+  public static void run (int threadNum, int numOfThreads, Random randomPtr, Sequencer sequencerPtr) {
+
+    int threadId = threadNum;
+
+    Segments segmentsPtr = sequencerPtr.segmentsPtr;
+
+    Hashtable         uniqueSegmentsPtr = sequencerPtr.uniqueSegmentsPtr;
+    endInfoEntry    endInfoEntries[] = sequencerPtr.endInfoEntries;
+    Table         startHashToConstructEntryTables[] = sequencerPtr.startHashToConstructEntryTables;
+    constructEntry  constructEntries[] = sequencerPtr.constructEntries;
+    Table         hashToConstructEntryTable = sequencerPtr.hashToConstructEntryTable;
+
+    Vector      segmentsContentsPtr = segmentsPtr.contentsPtr;
+    int        numSegment          = segmentsContentsPtr.size();
+    int        segmentLength       = segmentsPtr.length;
+
+    int i;
+    int j;
+    int i_start;
+    int i_stop;
+    int numUniqueSegment;
+    int substringLength;
+    int entryIndex;
+
+    int CHUNK_STEP1 = 12;
+
+    /*
+     * Step 1: Remove duplicate segments
+     */
+    int numThread = numOfThreads;
+    {
+      /* Choose disjoint segments [i_start,i_stop) for each thread */
+      int partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */
+      i_start = threadId * partitionSize;
+      if (threadId == (numThread - 1)) {
+        i_stop = numSegment;
+      } else {
+        i_stop = i_start + partitionSize;
+      }
+    }
+
+    for (i = i_start; i < i_stop; i+=CHUNK_STEP1) {
+      atomic {
+        int ii;
+        int ii_stop = Math.imin(i_stop, (i+CHUNK_STEP1));
+        for (ii = i; ii < ii_stop; ii++) {
+          String segment = (String)segmentsContentsPtr.elementAt(ii);
+          if(!uniqueSegmentsPtr.TMhashtable_insert(segment, segment)) {
+            ;
+          }
+        } /* ii */
+      }
+    }
+
+    Barrier.enterBarrier();
+
+    /*
+     * Step 2a: Iterate over unique segments and compute hashes.
+     *
+     * For the gene "atcg", the hashes for the end would be:
+     *
+     *     "t", "tc", and "tcg"
+     *
+     * And for the gene "tcgg", the hashes for the start would be:
+     *
+     *    "t", "tc", and "tcg"
+     *
+     * The names are "end" and "start" because if a matching pair is found,
+     * they are the substring of the end part of the pair and the start
+     * part of the pair respectively. In the above example, "tcg" is the
+     * matching substring so:
+     *
+     *     (end)    (start)
+     *     a[tcg] + [tcg]g  = a[tcg]g    (overlap = "tcg")
+     */
+
+    /* uniqueSegmentsPtr is constant now */
+    numUniqueSegment = uniqueSegmentsPtr.size;
+    entryIndex = 0;
+
+    {
+      /* Choose disjoint segments [i_start,i_stop) for each thread */
+      int num = uniqueSegmentsPtr.numBucket;
+      int partitionSize = (num + numThread/2) / numThread; /* with rounding */
+      i_start = threadId * partitionSize;
+      if (threadId == (numThread - 1)) {
+        i_stop = num;
+      } else {
+        i_stop = i_start + partitionSize;
+      }
+    }
+
+    {
+      /* Approximate disjoint segments of element allocation in constructEntries */
+      int partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+      entryIndex = threadId * partitionSize;
+    }
+
+    for (i = i_start; i < i_stop; i++) {
+      List chainPtr = uniqueSegmentsPtr.buckets[i];
+      ListNode it = chainPtr.head;
+
+      while(it.nextPtr != null) {
+        it = it.nextPtr;    
+        String segment = it.dataPtr.firstPtr;
+        int newj;
+        int startHash;
+        boolean status;
+
+        /* Find an empty constructEntries entry */
+        atomic {
+          while(constructEntries[entryIndex].segment != null) { 
+            entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */
+          }
+          constructEntries[entryIndex].segment = segment;
+        }
+
+        constructEntry constructEntryPtr = constructEntries[entryIndex];
+
+        entryIndex = (entryIndex + 1) % numUniqueSegment;
+
+
+
+        /*
+         * Save hashes (sdbm algorithm) of segment substrings
+         *
+         * endHashes will be computed for shorter substrings after matches
+         * have been made (in the next phase of the code). This will reduce
+         * the number of substrings for which hashes need to be computed.
+         *
+         * Since we can compute startHashes incrementally, we go ahead
+         * and compute all of them here.
+         */
+        /* constructEntryPtr is local now */
+        constructEntryPtr.endHash = hashString(segment.substring(1)); // USE BYTE SUBSTRING FUNCTION
+
+        startHash = 0;
+        for (newj = 1; newj < segmentLength; newj++) {
+          startHash = segment.charAt((int)newj-1) + (startHash << 6) + (startHash << 16) - startHash;
+          atomic {
+            boolean check = startHashToConstructEntryTables[newj].table_insert(startHash, constructEntryPtr);
+          }
+
+        }
+
+
+        /*
+         * For looking up construct entries quickly
+         */
+        startHash = segment.charAt((int)newj-1) + (startHash << 6) + (startHash << 16) - startHash;
+        atomic {
+          hashToConstructEntryTable.table_insert(startHash, constructEntryPtr);
+        }
+      }
+    }
+
+    Barrier.enterBarrier();
+
+    /*
+     * Step 2b: Match ends to starts by using hash-based string comparison.
+     */
+    for (substringLength = segmentLength-1; substringLength > 0; substringLength--) {
+
+      Table startHashToConstructEntryTablePtr = startHashToConstructEntryTables[substringLength];
+      LinkedList buckets[] = startHashToConstructEntryTablePtr.buckets;
+      int numBucket = startHashToConstructEntryTablePtr.numBucket;
+
+      int index_start;
+        int index_stop;
+
+        {
+          /* Choose disjoint segments [index_start,index_stop) for each thread */
+          int partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+          index_start = threadId * partitionSize;
+          if (threadId == (numThread - 1)) {
+            index_stop = numUniqueSegment;
+          } else {
+            index_stop = index_start + partitionSize;
+          }
+        }
+
+        /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */
+        for (entryIndex = index_start;
+            entryIndex < index_stop;
+            entryIndex += endInfoEntries[entryIndex].jumpToNext)
+        {
+          if (!endInfoEntries[entryIndex].isEnd) {
+            continue;
+          }
+
+          /*  ConstructEntries[entryIndex] is local data */
+          constructEntry endConstructEntryPtr = constructEntries[entryIndex];
+          String endSegment = endConstructEntryPtr.segment;
+          int endHash = endConstructEntryPtr.endHash;
+
+          LinkedList chainPtr = buckets[(endHash % numBucket)]; /* buckets: constant data */
+          LinkedListIterator it = (LinkedListIterator)chainPtr.iterator();
+          while (it.hasNext()) {
+            constructEntry startConstructEntryPtr = (constructEntry)it.next();
+            String startSegment = startConstructEntryPtr.segment;
+            int newLength = 0;
+
+            /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */
+            atomic {
+              if(startConstructEntryPtr.isStart &&
+                  (endConstructEntryPtr.startPtr != startConstructEntryPtr) &&
+                  (startSegment.substring(0, (int)substringLength).compareTo(endSegment.substring((int)(segmentLength-substringLength))) == 0))
+              {
+                startConstructEntryPtr.isStart = false;
+                constructEntry startConstructEntry_endPtr;
+                constructEntry endConstructEntry_startPtr;
+
+                /* Update endInfo (appended something so no inter end) */
+                endInfoEntries[entryIndex].isEnd = false;
+                /* Update segment chain construct info */
+                startConstructEntry_endPtr = startConstructEntryPtr.endPtr;
+                endConstructEntry_startPtr = endConstructEntryPtr.startPtr;
+                startConstructEntry_endPtr.startPtr = endConstructEntry_startPtr;
+                endConstructEntryPtr.nextPtr = startConstructEntryPtr;
+                endConstructEntry_startPtr.endPtr = startConstructEntry_endPtr;
+                endConstructEntryPtr.overlap = substringLength;
+                newLength = endConstructEntry_startPtr.length + startConstructEntryPtr.length - substringLength;
+                endConstructEntry_startPtr.length = newLength;
+              } else {/* if (matched) */
+              }
+            }
+
+            if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */
+              break;
+            }
+          } /* iterate over chain */
+
+        } /* for (endIndex < numUniqueSegment) */
+
+        Barrier.enterBarrier();
+
+        /*
+         * Step 2c: Update jump values and hashes
+         *
+         * endHash entries of all remaining ends are updated to the next
+         * substringLength. Additionally jumpToNext entries are updated such
+         * that they allow to skip non-end entries. Currently this is sequential
+         * because parallelization did not perform better.
+         */
+
+        if (threadId == 0) {
+          if (substringLength > 1) {
+            int index = segmentLength - substringLength + 1;
+            /* initialization if j and i: with i being the next end after j=0 */
+            for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) {
+              /* find first non-null */
+              ;
+            }
+            /* entry 0 is handled seperately from the loop below */
+            endInfoEntries[0].jumpToNext = i;
+            if (endInfoEntries[0].isEnd) {
+              String segment = constructEntries[0].segment;
+              constructEntries[0].endHash = hashString(segment.subString((int)index)); // USE BYTE SUBSTRING FUNCTION
+            }
+            /* Continue scanning (do not reset i) */
+            for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) {
+
+              if (endInfoEntries[i].isEnd) {
+                String segment = constructEntries[i].segment;
+                constructEntries[i].endHash = hashString(segment.substring((int)index)); // USE BYTE SUBSTRING FUNCTION
+                endInfoEntries[j].jumpToNext = Math.imax((int)1, (int)(i - j));
+                j = i;
+              }
+            }
+            endInfoEntries[j].jumpToNext = i - j;
+          }
+        }
+
+        Barrier.enterBarrier();
+
+    } /* for (substringLength > 0) */
+
+    Barrier.enterBarrier();
+
+    /*
+     * Step 3: Build sequence string
+     */
+    if (threadId == 0) {
+      int totalLength = 0;
+      for (i = 0; i < numUniqueSegment; i++) {
+        if (constructEntries[i].isStart) {
+          totalLength += constructEntries[i].length;
+        }
+      }
+
+      String sequence = sequencerPtr.sequence;
+
+      String copyPtr = sequence;
+      int sequenceLength = 0;
+
+      for (i = 0; i < numUniqueSegment; i++) {
+        /* If there are several start segments, we append in arbitrary order  */
+        constructEntry constructEntryPtr = constructEntries[i];
+        if (constructEntryPtr.isStart) {
+          int newSequenceLength = sequenceLength + constructEntryPtr.length;
+          int prevOverlap = 0;
+          do {
+            int numChar = segmentLength - constructEntryPtr.overlap;
+            copyPtr = constructEntryPtr.segment;
+            if(sequencerPtr.sequence == null) {
+              sequencerPtr.sequence = copyPtr;
+            } else {
+              sequencerPtr.sequence = sequencerPtr.sequence.concat(copyPtr.substring((int)(prevOverlap)));
+            }
+            prevOverlap = constructEntryPtr.overlap;
+            constructEntryPtr = constructEntryPtr.nextPtr;
+          } while (constructEntryPtr != null);
+        }
+      }
+    }
+  }
+
+  /* =============================================================================
+   * hashString
+   * -- uses sdbm hash function
+   * =============================================================================
+   */
+   static int hashString (String str)
+  {
+    int hash = 0;
+
+    int index = 0;
+    // Note: Do not change this hashing scheme 
+    for(index = 0; index < str.length(); index++) {
+      char c = str.charAt(index);
+      hash = c + (hash << 6) + (hash << 16) - hash;
+    }
+
+    if(hash < 0) hash *= -1;
+
+    return hash;
+  }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Table.java b/Robust/src/Benchmarks/SingleTM/Genome/Table.java

new file mode 100644 (file)

index 0000000..653728f
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/Table.java
@@ -0,0 +1,59 @@
+public class Table {
+
+    LinkedList buckets[];
+    int numBucket;
+
+
+    /* =============================================================================
+     * table_alloc
+     * -- Returns NULL on failure
+     * =============================================================================
+     */
+    Table (int myNumBucket) {
+    
+      int i;
+
+      buckets = new LinkedList[myNumBucket];
+      for(i = 0; i < myNumBucket; i++) {
+        buckets[i] = new LinkedList();      
+      }
+
+      numBucket = myNumBucket;
+      
+    }
+
+
+    /* =============================================================================
+     * table_insert
+     * -- Returns TRUE if successful, else FALSE
+     * =============================================================================
+     */
+    boolean table_insert (int hash, Object dataPtr) {
+      int i = (int)(hash % numBucket);
+      if(i < 0) i *= -1;
+      if(buckets[i].contains(dataPtr)) {
+        return false;
+      }
+      buckets[i].add(dataPtr);
+      return true;
+    }
+
+    /* =============================================================================
+     * table_remove
+     * -- Returns TRUE if successful, else FALSE
+     * =============================================================================
+     */
+    boolean table_remove (int hash, Object dataPtr) {
+    
+      int i = (int)(hash % numBucket);
+      boolean tempbool = buckets[i].contains(dataPtr);
+      if (tempbool) {
+          buckets[i].remove(dataPtr);
+          return true;
+      }
+
+      return false;
+    
+    }
+
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java b/Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java

new file mode 100644 (file)

index 0000000..e0c17ec
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java
@@ -0,0 +1,25 @@
+public class constructEntry {
+    boolean isStart;
+    String segment;
+    int endHash;
+    constructEntry startPtr;
+    constructEntry nextPtr;
+    constructEntry endPtr;
+    int overlap;
+    int length;
+      
+    constructEntry(String mySegment, boolean myStart, int myEndHash, constructEntry myStartPtr, constructEntry myNextPtr, constructEntry myEndPtr, int myOverlap, int myLength) {
+      segment = mySegment;
+      isStart = myStart;
+      endHash = myEndHash;
+      startPtr = this;
+      nextPtr = myNextPtr;
+      endPtr = this;
+      overlap = myOverlap;
+      length = myLength;
+    }
+    
+    boolean equals(constructEntry copy) {
+      return ((segment.compareTo(copy.segment) == 0) && (isStart == copy.isStart) && (endHash == copy.endHash) && (startPtr == copy.startPtr) && (nextPtr == copy.nextPtr) && (endPtr == copy.endPtr) && (overlap == copy.overlap) && (length == copy.length));
+    }
+}
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java b/Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java

new file mode 100644 (file)

index 0000000..3827a0e
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java
@@ -0,0 +1,13 @@
+  public class endInfoEntry {
+      boolean isEnd;
+      int jumpToNext;
+      
+      public endInfoEntry() {
+        isEnd = false;
+        jumpToNext = 0;
+      }
+      public endInfoEntry(boolean myEnd, int myNext) {
+        isEnd = myEnd;
+        jumpToNext = myNext;
+      }
+  }
diff --git a/Robust/src/Benchmarks/SingleTM/Genome/makefile b/Robust/src/Benchmarks/SingleTM/Genome/makefile

new file mode 100644 (file)

index 0000000..8f1f2e0
--- /dev/null
+++ b/Robust/src/Benchmarks/SingleTM/Genome/makefile
@@ -0,0 +1,20 @@
+MAINCLASS=Genome
+SRC=${MAINCLASS}.java \
+       ../common/Random.java \
+       Bitmap.java \
+       Gene.java \
+       Segments.java \
+       endInfoEntry.java \
+       constructEntry.java \
+       ../../../ClassLibrary/JavaSTM/Barrier.java \
+       Sequencer.java \
+       Table.java \
+       Hashtable.java
+FLAGS=-mainclass ${MAINCLASS} -singleTM -optimize -dcopts -abcclose -fastmemcpy -joptimize
+
+default:
+       ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
+
+clean:
+       rm -rf tmpbuilddirectory
+       rm *.bin
author	adash <adash>
	Tue, 9 Jun 2009 01:31:41 +0000 (01:31 +0000)
committer	adash <adash>
	Tue, 9 Jun 2009 01:31:41 +0000 (01:31 +0000)
Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Gene.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Genome.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/List.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/ListNode.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Pair.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Segments.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/Table.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java	[new file with mode: 0644]	patch \| blob
Robust/src/Benchmarks/SingleTM/Genome/makefile	[new file with mode: 0644]	patch \| blob