--- /dev/null
+public class Bitmap {
+ public int numBit;
+ public int numWord;
+ public int bits[];
+
+ public int NUM_BIT_PER_BYTE;
+ public int NUM_BIT_PER_WORD;
+
+
+ /* =============================================================================
+ * bitmap_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Bitmap(int myNumBit) {
+
+ NUM_BIT_PER_BYTE = 8;
+ NUM_BIT_PER_WORD = ((8) * NUM_BIT_PER_BYTE);
+
+ numBit = myNumBit;
+ numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD);
+
+ bits = new int[numWord];
+
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = 0;
+ }
+ }
+
+ Bitmap(Bitmap myBitMap) {
+ NUM_BIT_PER_BYTE = 8;
+ NUM_BIT_PER_WORD = ((8) * NUM_BIT_PER_BYTE);
+
+
+ numBit = myBitMap.numBit;
+ numWord = myBitMap.numWord;
+ bits = new int[numWord];
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = myBitMap.bits[i];
+ }
+ }
+
+ /* =============================================================================
+ * bitmap_set
+ * -- Sets ith bit to 1
+ * -- Returns TRUE on success, else FALSE
+ * =============================================================================
+ */
+ boolean set (int i) {
+ if ((i < 0) || (i >= numBit)) {
+ return false;
+ }
+
+ bits[((int)i)/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD));
+
+ return true;
+ }
+
+
+ /* =============================================================================
+ * bitmap_clear
+ * -- Clears ith bit to 0
+ * -- Returns TRUE on success, else FALSE
+ * =============================================================================
+ */
+ boolean clear (int i) {
+ if ((i < 0) || (i >= numBit)) {
+ return false;
+ }
+
+ bits[((int)i)/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD));
+
+ return true;
+ }
+
+
+ /* =============================================================================
+ * bitmap_clearAll
+ * -- Clears all bit to 0
+ * =============================================================================
+ */
+ void clearAll () {
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = 0;
+ }
+ }
+
+
+ /* =============================================================================
+ * bitmap_isSet
+ * -- Returns TRUE if ith bit is set, else FALSE
+ * =============================================================================
+ */
+ boolean isSet (int i) {
+ int tempB = (int)bits[((int)i)/NUM_BIT_PER_WORD];
+ int tempC = (1 << (((int)i) % NUM_BIT_PER_WORD));
+ boolean tempbool = ((tempB & tempC) > 0) ? true:false;
+ //tempB /*bits[((int)i)/NUM_BIT_PER_WORD]*/ & tempC /*(1 << (i % NUM_BIT_PER_WORD))*/
+ if ((i >= 0) && (i < (int)numBit) && tempbool) {
+ return true;
+ }
+
+ return false;
+ }
+
+
+ /* =============================================================================
+ * bitmap_findClear
+ * -- Returns index of first clear bit
+ * -- If start index is negative, will start from beginning
+ * -- If all bits are set, returns -1
+ * =============================================================================
+ */
+ int findClear (int startIndex) {
+ int i;
+ boolean tempbool = ((bits[((int)i)/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) > 0) ? true:false;
+ for (i = MAX(startIndex, 0); i < numBit; i++) {
+ if (!tempbool) {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+
+ /* =============================================================================
+ * bitmap_findSet
+ * -- Returns index of first set bit
+ * -- If all bits are clear, returns -1
+ * =============================================================================
+ */
+ int findSet (int startIndex) {
+ int i;
+
+ for (i = MAX(startIndex, 0); i < numBit; i++) {
+ boolean tempbool = ((int)bits[((int)i)/NUM_BIT_PER_WORD] & (1 << ((int)i % NUM_BIT_PER_WORD)) > 0) ? true:false;
+ if (tempbool) {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+
+ /* =============================================================================
+ * bitmap_getNumClear
+ * =============================================================================
+ */
+ int getNumClear () {
+ return (numBit - getNumSet());
+ }
+
+
+ /* =============================================================================
+ * bitmap_getNumSet
+ * =============================================================================
+ */
+ int getNumSet () {
+ int i;
+ int count = 0;
+ for (i = 0; i < numBit; i++) {
+ boolean tempbool = ((int)bits[((int)i)/NUM_BIT_PER_WORD] & (1 << ((int)i % NUM_BIT_PER_WORD)) > 0) ? true:false;
+ if (tempbool) {
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ /* =============================================================================
+ * bitmap_toggleAll
+ * =============================================================================
+ */
+ void toggleAll () {
+ int w;
+ for (w = 0; w < numWord; w++) {
+ bits[w] ^= -1;
+ }
+ }
+
+ int DIVIDE_AND_ROUND_UP(int a, int b) {
+ return (a/b) + (((a % b) > 0) ? (1) : (0));
+ }
+
+ int MAX(int a, int b) {
+ return (a > b) ? a : b;
+ }
+}
--- /dev/null
+public class Gene {
+ public int length;
+ public String contents;
+ public Bitmap startBitmapPtr; /* used for creating segments */
+
+ Gene(int myLength) {
+ length = myLength;
+ contents = "";
+ startBitmapPtr = new Bitmap(length);
+ }
+
+
+/* =============================================================================
+ * gene_create
+ * -- Populate contents with random gene
+ * =============================================================================
+ */
+ void create (Random randomObj) {
+ int i;
+ char[] nucleotides = new char[4];
+ char[] arrayContents = new char[length];
+ nucleotides[0] = 'a';
+ nucleotides[1] = 'c';
+ nucleotides[2] = 'g';
+ nucleotides[3] = 't';
+
+ for (i = 0; i < length; i++) {
+ int legitimateNumber = (int)randomObj.random_generate();
+ if(legitimateNumber < 0) {
+ legitimateNumber *= -1;
+ }
+ arrayContents[i] = nucleotides[legitimateNumber % 4];
+ }
+
+ contents = new String(arrayContents);
+ }
+}
--- /dev/null
+public class Genome extends Thread {
+ int geneLength;
+ int segmentLength;
+ int minNumSegment;
+ int numThread;
+
+ int threadid;
+
+ // add segments, random, etc to member variables
+ // include in constructor
+ // allows for passing in thread run function
+ Random randomPtr;
+ Gene genePtr;
+ Segments segmentsPtr;
+ Sequencer sequencerPtr;
+
+ Genome(String x[]) {
+ parseCmdLine(x);
+ if(numThread == 0) {
+ numThread = 1;
+ }
+
+ randomPtr = new Random();
+ randomPtr.random_alloc();
+ randomPtr.random_seed(0);
+
+ genePtr = new Gene(geneLength);
+ genePtr.create(randomPtr);
+
+ segmentsPtr = new Segments(segmentLength, minNumSegment);
+ segmentsPtr.create(genePtr, randomPtr);
+
+ sequencerPtr = new Sequencer(geneLength, segmentLength, segmentsPtr);
+ }
+
+ Genome(int myThreadid, int myGeneLength, int mySegLength, int myMinNumSegs, int myNumThread, Random myRandomPtr, Gene myGenePtr, Segments mySegmentsPtr, Sequencer mySequencerPtr) {
+ threadid = myThreadid;
+ geneLength = myGeneLength;
+ segmentLength = mySegLength;
+ minNumSegment = myMinNumSegs;
+ numThread = myNumThread;
+
+ randomPtr = myRandomPtr;
+ genePtr = myGenePtr;
+ segmentsPtr = mySegmentsPtr;
+ sequencerPtr = mySequencerPtr;
+ }
+
+ public void parseCmdLine(String args[]) {
+ int i = 0;
+ String arg;
+ while (i < args.length && args[i].startsWith("-")) {
+ arg = args[i++];
+ //check options
+ if(arg.equals("-g")) {
+ if(i < args.length) {
+ this.geneLength = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-s")) {
+ if(i < args.length) {
+ this.segmentLength = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-n")) {
+ if(i < args.length) {
+ this.minNumSegment = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-t")) {
+ if(i < args.length) {
+ this.numThread = new Integer(args[i++]).intValue();
+ }
+ }
+ }
+
+ }
+
+ public void run() {
+ Barrier.enterBarrier();
+ Sequencer.run(threadid, numThread, randomPtr, sequencerPtr);
+ Barrier.enterBarrier();
+ }
+
+ public static void main(String x[]){
+
+ System.out.print("Creating gene and segments... ");
+ Genome g = new Genome(x);
+
+ System.out.println("done.");
+ System.out.println("Gene length = " + g.genePtr.length);
+ System.out.println("Segment length = " + g.segmentsPtr.length);
+ System.out.println("Number segments = " + g.segmentsPtr.contentsPtr.size());
+ System.out.println("Number threads = " + g.numThread);
+
+
+ Barrier.setBarrier((int)g.numThread);
+
+ /* Create and Start Threads */
+
+ String gene = g.genePtr.contents;
+ Genome[] gn = new Genome[g.numThread];
+
+ for(int i = 1; i<g.numThread; i++) {
+ gn[i] = new Genome(i, g.geneLength, g.segmentLength, g.minNumSegment, g.numThread, g.randomPtr, g.genePtr, g.segmentsPtr, g.sequencerPtr);
+ }
+
+ System.out.print("Sequencing gene... ");
+
+ for(int i = 1; i<g.numThread; i++) {
+ gn[i].start();
+ }
+
+ Barrier.enterBarrier();
+ Sequencer.run(0, g.numThread, g.randomPtr, g.sequencerPtr);
+ Barrier.enterBarrier();
+
+
+ System.out.println("done.");
+
+ /* Check result */
+ {
+ String sequence = g.sequencerPtr.sequence;
+ boolean result = (gene.compareTo(sequence) == 0) ? true:false;
+ System.out.println("Sequence matches gene: " + (result ? "yes" : "no"));
+ //DEBUG
+ //if (result) {
+ // System.out.println("gene = " + gene);
+ // System.out.println("sequence = " + sequence);
+ //}
+ }
+
+ }
+}
--- /dev/null
+public class Hashtable {
+ List buckets[];
+ int numBucket;
+ int size;
+ int resizeRatio;
+ int growthFactor;
+
+
+ public Hashtable (int initNumBucket, int resizeRatio, int growthFactor) {
+
+ allocBuckets(initNumBucket);
+ numBucket = initNumBucket;
+ size = 0;
+ resizeRatio = ((resizeRatio < 0) ? 3 : resizeRatio);
+ growthFactor = ((growthFactor < 0) ? 3 : growthFactor);
+ }
+
+ public boolean TMhashtable_insert (String keyPtr, String dataPtr) {
+ int i = hashSegment(keyPtr) % numBucket;
+
+ Pair findPair = new Pair();
+ findPair.firstPtr = keyPtr;
+ Pair pairPtr = buckets[(int)i].find(findPair);
+ if (pairPtr != null) {
+ return false;
+ }
+
+ Pair insertPtr = new Pair(keyPtr, dataPtr);
+
+ /* Add new entry */
+ if (buckets[(int)i].insert(insertPtr) == false) {
+ return false;
+ }
+
+ size++;
+
+ return true;
+ }
+
+ void allocBuckets (int numBucket) {
+ int i;
+ /* Allocate bucket: extra bucket is dummy for easier iterator code */
+ buckets = new List[numBucket+1];
+
+ for (i = 0; i < (numBucket + 1); i++) {
+ List chainPtr = new List();
+ buckets[(int)i] = chainPtr;
+ }
+ }
+
+ int hashSegment (String str) {
+ int hash = 0;
+
+ int index = 0;
+ /* Note: Do not change this hashing scheme */
+ for(index = 0; index < str.length(); index++) {
+ char c = str.charAt(index);
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ if(hash < 0) hash *= -1;
+
+ return hash;
+ }
+}
--- /dev/null
+public class List {
+ ListNode head;
+ int size;
+
+ public List () {
+ head = new ListNode();
+ head.dataPtr = null;
+ head.nextPtr = null;
+ size = 0;
+ }
+
+ Pair find (Pair dataPtr) {
+ ListNode nodePtr;
+ ListNode prevPtr = findPrevious(dataPtr);
+
+ nodePtr = prevPtr.nextPtr;
+
+ if ((nodePtr == null) || (compareSegment(nodePtr.dataPtr, dataPtr) != 0)) {
+ return null;
+ }
+
+ return (nodePtr.dataPtr);
+ }
+
+ ListNode findPrevious (Pair dataPtr) {
+ ListNode prevPtr = head;
+ ListNode nodePtr;
+ nodePtr = prevPtr.nextPtr;
+
+ for (; nodePtr != null; nodePtr = nodePtr.nextPtr) {
+ if (compareSegment(nodePtr.dataPtr, dataPtr) >= 0) {
+ return prevPtr;
+ }
+ prevPtr = nodePtr;
+ }
+
+ return prevPtr;
+ }
+
+ boolean insert (Pair dataPtr) {
+ ListNode prevPtr;
+ ListNode nodePtr;
+ ListNode currPtr;
+
+ prevPtr = findPrevious(dataPtr);
+ currPtr = prevPtr.nextPtr;
+
+ if ((currPtr != null) && (compareSegment((Pair)currPtr.dataPtr, (Pair)dataPtr) == 0)) {
+ return false;
+ }
+
+ nodePtr = new ListNode(dataPtr);
+
+ nodePtr.nextPtr = currPtr;
+ prevPtr.nextPtr = nodePtr;
+ size++;
+
+ return true;
+ }
+
+ int compareSegment (Pair a, Pair b) {
+ String aString = a.firstPtr;
+ String bString = b.firstPtr;
+ return aString.compareTo(bString);
+ }
+}
--- /dev/null
+public class ListNode {
+ Pair dataPtr;
+ ListNode nextPtr;
+
+ public ListNode () {
+ dataPtr = null;
+ nextPtr = null;
+ }
+
+ public ListNode (Pair myDataPtr) {
+ dataPtr = myDataPtr;
+ nextPtr = null;
+ }
+}
--- /dev/null
+public class Pair {
+ String firstPtr;
+ String secondPtr;
+
+ public Pair() {
+ firstPtr = null;
+ secondPtr = null;
+ }
+
+ public Pair(String myFirstPtr, String mySecondPtr) {
+ firstPtr = myFirstPtr;
+ secondPtr = mySecondPtr;
+ }
+}
--- /dev/null
+public class Segments {
+ public int length;
+ public int minNum;
+ Vector contentsPtr;
+ /* private: */
+ String strings[];
+
+ Segments (int myLength, int myMinNum) {
+ minNum = myMinNum;
+ length = myLength;
+
+ strings = new String[(int)minNum];
+ contentsPtr = new Vector((int)minNum);
+ }
+
+
+ /* =============================================================================
+ * segments_create
+ * -- Populates 'contentsPtr'
+ * =============================================================================
+ */
+ void create (Gene genePtr, Random randomPtr) {
+ String geneString;
+ int geneLength;
+ Bitmap startBitmapPtr;
+ int numStart;
+ int i;
+ int maxZeroRunLength;
+
+ geneString = genePtr.contents;
+ geneLength = genePtr.length;
+ startBitmapPtr = genePtr.startBitmapPtr;
+ numStart = geneLength - length + 1;
+
+ /* Pick some random segments to start */
+ for (i = 0; i < minNum; i++) {
+ int j = (int)(randomPtr.random_generate() % numStart);
+ boolean status = startBitmapPtr.set(j);
+ strings[i] = geneString.substring((int)j, (int)(j+length)); // WRITE SUBSTRING FUNCTION
+ contentsPtr.addElement(strings[i]);
+ }
+
+ /* Make sure segment covers start */
+ i = 0;
+ if (!startBitmapPtr.isSet(i)) {
+ String string;
+ string = geneString.subString((int)i, (int)(i+length)); // USE BYTE SUBSTRING FUNCTION
+ contentsPtr.addElement(string);
+ startBitmapPtr.set(i);
+ }
+
+ /* Add extra segments to fill holes and ensure overlap */
+ maxZeroRunLength = length - 1;
+ for (i = 0; i < numStart; i++) {
+ int i_stop = Math.imin((i+maxZeroRunLength), numStart);
+ for ( /* continue */; i < i_stop; i++) {
+ if (startBitmapPtr.isSet(i)) {
+ break;
+ }
+ }
+ if (i == i_stop) {
+ /* Found big enough hole */
+ i = i - 1;
+ String string = geneString.subString((int)i, (int)(i+length)); // USE BYTE SUBSTRING FUNCTION
+ contentsPtr.addElement(string);
+ startBitmapPtr.set(i);
+ }
+ }
+ }
+}
--- /dev/null
+public class Sequencer {
+
+ public String sequence;
+
+ public Segments segmentsPtr;
+
+ /* For removing duplicate segments */
+ Hashtable uniqueSegmentsPtr;
+
+ /* For matching segments */
+ endInfoEntry endInfoEntries[];
+ Table startHashToConstructEntryTables[];
+
+ /* For constructing sequence */
+ constructEntry constructEntries[];
+ Table hashToConstructEntryTable;
+
+ /* For deallocation */
+ int segmentLength;
+
+
+ /* =============================================================================
+ * sequencer_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ public Sequencer (int myGeneLength, int mySegmentLength, Segments mySegmentsPtr) {
+
+ int maxNumUniqueSegment = myGeneLength - mySegmentLength + 1;
+ int i;
+
+ uniqueSegmentsPtr = new Hashtable((int)myGeneLength, -1, -1);
+
+ /* For finding a matching entry */
+ endInfoEntries = new endInfoEntry[maxNumUniqueSegment];
+ for (i = 0; i < maxNumUniqueSegment; i++) {
+ endInfoEntries[i] = new endInfoEntry(true, 1);
+ }
+
+ startHashToConstructEntryTables = new Table[mySegmentLength];
+ for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */
+ startHashToConstructEntryTables[i] = new Table(myGeneLength);
+ }
+ segmentLength = mySegmentLength;
+
+ /* For constructing sequence */
+ constructEntries = new constructEntry[maxNumUniqueSegment];
+
+ for (i= 0; i < maxNumUniqueSegment; i++) {
+ constructEntries[i] = new constructEntry(null, true, 0, null, null, null, 0, segmentLength);
+ }
+ hashToConstructEntryTable = new Table(myGeneLength);
+
+ segmentsPtr = mySegmentsPtr;
+ }
+
+
+ /* =============================================================================
+ * sequencer_run
+ * =============================================================================
+ */
+
+ public static void run (int threadNum, int numOfThreads, Random randomPtr, Sequencer sequencerPtr) {
+
+ int threadId = threadNum;
+
+ Segments segmentsPtr = sequencerPtr.segmentsPtr;
+
+ Hashtable uniqueSegmentsPtr = sequencerPtr.uniqueSegmentsPtr;
+ endInfoEntry endInfoEntries[] = sequencerPtr.endInfoEntries;
+ Table startHashToConstructEntryTables[] = sequencerPtr.startHashToConstructEntryTables;
+ constructEntry constructEntries[] = sequencerPtr.constructEntries;
+ Table hashToConstructEntryTable = sequencerPtr.hashToConstructEntryTable;
+
+ Vector segmentsContentsPtr = segmentsPtr.contentsPtr;
+ int numSegment = segmentsContentsPtr.size();
+ int segmentLength = segmentsPtr.length;
+
+ int i;
+ int j;
+ int i_start;
+ int i_stop;
+ int numUniqueSegment;
+ int substringLength;
+ int entryIndex;
+
+ int CHUNK_STEP1 = 12;
+
+ /*
+ * Step 1: Remove duplicate segments
+ */
+ int numThread = numOfThreads;
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ int partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = numSegment;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+
+ for (i = i_start; i < i_stop; i+=CHUNK_STEP1) {
+ atomic {
+ int ii;
+ int ii_stop = Math.imin(i_stop, (i+CHUNK_STEP1));
+ for (ii = i; ii < ii_stop; ii++) {
+ String segment = (String)segmentsContentsPtr.elementAt(ii);
+ if(!uniqueSegmentsPtr.TMhashtable_insert(segment, segment)) {
+ ;
+ }
+ } /* ii */
+ }
+ }
+
+ Barrier.enterBarrier();
+
+ /*
+ * Step 2a: Iterate over unique segments and compute hashes.
+ *
+ * For the gene "atcg", the hashes for the end would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * And for the gene "tcgg", the hashes for the start would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * The names are "end" and "start" because if a matching pair is found,
+ * they are the substring of the end part of the pair and the start
+ * part of the pair respectively. In the above example, "tcg" is the
+ * matching substring so:
+ *
+ * (end) (start)
+ * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg")
+ */
+
+ /* uniqueSegmentsPtr is constant now */
+ numUniqueSegment = uniqueSegmentsPtr.size;
+ entryIndex = 0;
+
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ int num = uniqueSegmentsPtr.numBucket;
+ int partitionSize = (num + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = num;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+
+ {
+ /* Approximate disjoint segments of element allocation in constructEntries */
+ int partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ entryIndex = threadId * partitionSize;
+ }
+
+ for (i = i_start; i < i_stop; i++) {
+ List chainPtr = uniqueSegmentsPtr.buckets[i];
+ ListNode it = chainPtr.head;
+
+ while(it.nextPtr != null) {
+ it = it.nextPtr;
+ String segment = it.dataPtr.firstPtr;
+ int newj;
+ int startHash;
+ boolean status;
+
+ /* Find an empty constructEntries entry */
+ atomic {
+ while(constructEntries[entryIndex].segment != null) {
+ entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */
+ }
+ constructEntries[entryIndex].segment = segment;
+ }
+
+ constructEntry constructEntryPtr = constructEntries[entryIndex];
+
+ entryIndex = (entryIndex + 1) % numUniqueSegment;
+
+
+
+ /*
+ * Save hashes (sdbm algorithm) of segment substrings
+ *
+ * endHashes will be computed for shorter substrings after matches
+ * have been made (in the next phase of the code). This will reduce
+ * the number of substrings for which hashes need to be computed.
+ *
+ * Since we can compute startHashes incrementally, we go ahead
+ * and compute all of them here.
+ */
+ /* constructEntryPtr is local now */
+ constructEntryPtr.endHash = hashString(segment.substring(1)); // USE BYTE SUBSTRING FUNCTION
+
+ startHash = 0;
+ for (newj = 1; newj < segmentLength; newj++) {
+ startHash = segment.charAt((int)newj-1) + (startHash << 6) + (startHash << 16) - startHash;
+ atomic {
+ boolean check = startHashToConstructEntryTables[newj].table_insert(startHash, constructEntryPtr);
+ }
+
+ }
+
+
+ /*
+ * For looking up construct entries quickly
+ */
+ startHash = segment.charAt((int)newj-1) + (startHash << 6) + (startHash << 16) - startHash;
+ atomic {
+ hashToConstructEntryTable.table_insert(startHash, constructEntryPtr);
+ }
+ }
+ }
+
+ Barrier.enterBarrier();
+
+ /*
+ * Step 2b: Match ends to starts by using hash-based string comparison.
+ */
+ for (substringLength = segmentLength-1; substringLength > 0; substringLength--) {
+
+ Table startHashToConstructEntryTablePtr = startHashToConstructEntryTables[substringLength];
+ LinkedList buckets[] = startHashToConstructEntryTablePtr.buckets;
+ int numBucket = startHashToConstructEntryTablePtr.numBucket;
+
+ int index_start;
+ int index_stop;
+
+ {
+ /* Choose disjoint segments [index_start,index_stop) for each thread */
+ int partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ index_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ index_stop = numUniqueSegment;
+ } else {
+ index_stop = index_start + partitionSize;
+ }
+ }
+
+ /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */
+ for (entryIndex = index_start;
+ entryIndex < index_stop;
+ entryIndex += endInfoEntries[entryIndex].jumpToNext)
+ {
+ if (!endInfoEntries[entryIndex].isEnd) {
+ continue;
+ }
+
+ /* ConstructEntries[entryIndex] is local data */
+ constructEntry endConstructEntryPtr = constructEntries[entryIndex];
+ String endSegment = endConstructEntryPtr.segment;
+ int endHash = endConstructEntryPtr.endHash;
+
+ LinkedList chainPtr = buckets[(endHash % numBucket)]; /* buckets: constant data */
+ LinkedListIterator it = (LinkedListIterator)chainPtr.iterator();
+ while (it.hasNext()) {
+ constructEntry startConstructEntryPtr = (constructEntry)it.next();
+ String startSegment = startConstructEntryPtr.segment;
+ int newLength = 0;
+
+ /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */
+ atomic {
+ if(startConstructEntryPtr.isStart &&
+ (endConstructEntryPtr.startPtr != startConstructEntryPtr) &&
+ (startSegment.substring(0, (int)substringLength).compareTo(endSegment.substring((int)(segmentLength-substringLength))) == 0))
+ {
+ startConstructEntryPtr.isStart = false;
+ constructEntry startConstructEntry_endPtr;
+ constructEntry endConstructEntry_startPtr;
+
+ /* Update endInfo (appended something so no inter end) */
+ endInfoEntries[entryIndex].isEnd = false;
+ /* Update segment chain construct info */
+ startConstructEntry_endPtr = startConstructEntryPtr.endPtr;
+ endConstructEntry_startPtr = endConstructEntryPtr.startPtr;
+ startConstructEntry_endPtr.startPtr = endConstructEntry_startPtr;
+ endConstructEntryPtr.nextPtr = startConstructEntryPtr;
+ endConstructEntry_startPtr.endPtr = startConstructEntry_endPtr;
+ endConstructEntryPtr.overlap = substringLength;
+ newLength = endConstructEntry_startPtr.length + startConstructEntryPtr.length - substringLength;
+ endConstructEntry_startPtr.length = newLength;
+ } else {/* if (matched) */
+ }
+ }
+
+ if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */
+ break;
+ }
+ } /* iterate over chain */
+
+ } /* for (endIndex < numUniqueSegment) */
+
+ Barrier.enterBarrier();
+
+ /*
+ * Step 2c: Update jump values and hashes
+ *
+ * endHash entries of all remaining ends are updated to the next
+ * substringLength. Additionally jumpToNext entries are updated such
+ * that they allow to skip non-end entries. Currently this is sequential
+ * because parallelization did not perform better.
+ */
+
+ if (threadId == 0) {
+ if (substringLength > 1) {
+ int index = segmentLength - substringLength + 1;
+ /* initialization if j and i: with i being the next end after j=0 */
+ for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) {
+ /* find first non-null */
+ ;
+ }
+ /* entry 0 is handled seperately from the loop below */
+ endInfoEntries[0].jumpToNext = i;
+ if (endInfoEntries[0].isEnd) {
+ String segment = constructEntries[0].segment;
+ constructEntries[0].endHash = hashString(segment.subString((int)index)); // USE BYTE SUBSTRING FUNCTION
+ }
+ /* Continue scanning (do not reset i) */
+ for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) {
+
+ if (endInfoEntries[i].isEnd) {
+ String segment = constructEntries[i].segment;
+ constructEntries[i].endHash = hashString(segment.substring((int)index)); // USE BYTE SUBSTRING FUNCTION
+ endInfoEntries[j].jumpToNext = Math.imax((int)1, (int)(i - j));
+ j = i;
+ }
+ }
+ endInfoEntries[j].jumpToNext = i - j;
+ }
+ }
+
+ Barrier.enterBarrier();
+
+ } /* for (substringLength > 0) */
+
+ Barrier.enterBarrier();
+
+ /*
+ * Step 3: Build sequence string
+ */
+ if (threadId == 0) {
+ int totalLength = 0;
+ for (i = 0; i < numUniqueSegment; i++) {
+ if (constructEntries[i].isStart) {
+ totalLength += constructEntries[i].length;
+ }
+ }
+
+ String sequence = sequencerPtr.sequence;
+
+ String copyPtr = sequence;
+ int sequenceLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ /* If there are several start segments, we append in arbitrary order */
+ constructEntry constructEntryPtr = constructEntries[i];
+ if (constructEntryPtr.isStart) {
+ int newSequenceLength = sequenceLength + constructEntryPtr.length;
+ int prevOverlap = 0;
+ do {
+ int numChar = segmentLength - constructEntryPtr.overlap;
+ copyPtr = constructEntryPtr.segment;
+ if(sequencerPtr.sequence == null) {
+ sequencerPtr.sequence = copyPtr;
+ } else {
+ sequencerPtr.sequence = sequencerPtr.sequence.concat(copyPtr.substring((int)(prevOverlap)));
+ }
+ prevOverlap = constructEntryPtr.overlap;
+ constructEntryPtr = constructEntryPtr.nextPtr;
+ } while (constructEntryPtr != null);
+ }
+ }
+ }
+ }
+
+ /* =============================================================================
+ * hashString
+ * -- uses sdbm hash function
+ * =============================================================================
+ */
+ static int hashString (String str)
+ {
+ int hash = 0;
+
+ int index = 0;
+ // Note: Do not change this hashing scheme
+ for(index = 0; index < str.length(); index++) {
+ char c = str.charAt(index);
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ if(hash < 0) hash *= -1;
+
+ return hash;
+ }
+}
--- /dev/null
+public class Table {
+
+ LinkedList buckets[];
+ int numBucket;
+
+
+ /* =============================================================================
+ * table_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Table (int myNumBucket) {
+
+ int i;
+
+ buckets = new LinkedList[myNumBucket];
+ for(i = 0; i < myNumBucket; i++) {
+ buckets[i] = new LinkedList();
+ }
+
+ numBucket = myNumBucket;
+
+ }
+
+
+ /* =============================================================================
+ * table_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+ boolean table_insert (int hash, Object dataPtr) {
+ int i = (int)(hash % numBucket);
+ if(i < 0) i *= -1;
+ if(buckets[i].contains(dataPtr)) {
+ return false;
+ }
+ buckets[i].add(dataPtr);
+ return true;
+ }
+
+ /* =============================================================================
+ * table_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+ boolean table_remove (int hash, Object dataPtr) {
+
+ int i = (int)(hash % numBucket);
+ boolean tempbool = buckets[i].contains(dataPtr);
+ if (tempbool) {
+ buckets[i].remove(dataPtr);
+ return true;
+ }
+
+ return false;
+
+ }
+
+}
--- /dev/null
+public class constructEntry {
+ boolean isStart;
+ String segment;
+ int endHash;
+ constructEntry startPtr;
+ constructEntry nextPtr;
+ constructEntry endPtr;
+ int overlap;
+ int length;
+
+ constructEntry(String mySegment, boolean myStart, int myEndHash, constructEntry myStartPtr, constructEntry myNextPtr, constructEntry myEndPtr, int myOverlap, int myLength) {
+ segment = mySegment;
+ isStart = myStart;
+ endHash = myEndHash;
+ startPtr = this;
+ nextPtr = myNextPtr;
+ endPtr = this;
+ overlap = myOverlap;
+ length = myLength;
+ }
+
+ boolean equals(constructEntry copy) {
+ return ((segment.compareTo(copy.segment) == 0) && (isStart == copy.isStart) && (endHash == copy.endHash) && (startPtr == copy.startPtr) && (nextPtr == copy.nextPtr) && (endPtr == copy.endPtr) && (overlap == copy.overlap) && (length == copy.length));
+ }
+}
--- /dev/null
+ public class endInfoEntry {
+ boolean isEnd;
+ int jumpToNext;
+
+ public endInfoEntry() {
+ isEnd = false;
+ jumpToNext = 0;
+ }
+ public endInfoEntry(boolean myEnd, int myNext) {
+ isEnd = myEnd;
+ jumpToNext = myNext;
+ }
+ }
--- /dev/null
+MAINCLASS=Genome
+SRC=${MAINCLASS}.java \
+ ../common/Random.java \
+ Bitmap.java \
+ Gene.java \
+ Segments.java \
+ endInfoEntry.java \
+ constructEntry.java \
+ ../../../ClassLibrary/JavaSTM/Barrier.java \
+ Sequencer.java \
+ Table.java \
+ Hashtable.java
+FLAGS=-mainclass ${MAINCLASS} -singleTM -optimize -dcopts -abcclose -fastmemcpy -joptimize
+
+default:
+ ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin