From: adash <adash> Date: Tue, 9 Jun 2009 01:31:41 +0000 (+0000) Subject: create new Genome directory and add files X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=200429af1ad2fe06490d7f5730ad4f7f8c9962fc;p=IRC.git create new Genome directory and add files clean up the comments change C longs -> ints works fine for singleTM version with all compiler optimizations --- diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java b/Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java new file mode 100644 index 00000000..f9115244 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Bitmap.java @@ -0,0 +1,194 @@ +public class Bitmap { + public int numBit; + public int numWord; + public int bits[]; + + public int NUM_BIT_PER_BYTE; + public int NUM_BIT_PER_WORD; + + + /* ============================================================================= + * bitmap_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Bitmap(int myNumBit) { + + NUM_BIT_PER_BYTE = 8; + NUM_BIT_PER_WORD = ((8) * NUM_BIT_PER_BYTE); + + numBit = myNumBit; + numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD); + + bits = new int[numWord]; + + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = 0; + } + } + + Bitmap(Bitmap myBitMap) { + NUM_BIT_PER_BYTE = 8; + NUM_BIT_PER_WORD = ((8) * NUM_BIT_PER_BYTE); + + + numBit = myBitMap.numBit; + numWord = myBitMap.numWord; + bits = new int[numWord]; + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = myBitMap.bits[i]; + } + } + + /* ============================================================================= + * bitmap_set + * -- Sets ith bit to 1 + * -- Returns TRUE on success, else FALSE + * ============================================================================= + */ + boolean set (int i) { + if ((i < 0) || (i >= numBit)) { + return false; + } + + bits[((int)i)/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD)); + + return true; + } + + + /* ============================================================================= + * bitmap_clear + * -- Clears ith bit to 0 + * -- Returns TRUE on success, else FALSE + * ============================================================================= + */ + boolean clear (int i) { + if ((i < 0) || (i >= numBit)) { + return false; + } + + bits[((int)i)/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD)); + + return true; + } + + + /* ============================================================================= + * bitmap_clearAll + * -- Clears all bit to 0 + * ============================================================================= + */ + void clearAll () { + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = 0; + } + } + + + /* ============================================================================= + * bitmap_isSet + * -- Returns TRUE if ith bit is set, else FALSE + * ============================================================================= + */ + boolean isSet (int i) { + int tempB = (int)bits[((int)i)/NUM_BIT_PER_WORD]; + int tempC = (1 << (((int)i) % NUM_BIT_PER_WORD)); + boolean tempbool = ((tempB & tempC) > 0) ? true:false; + //tempB /*bits[((int)i)/NUM_BIT_PER_WORD]*/ & tempC /*(1 << (i % NUM_BIT_PER_WORD))*/ + if ((i >= 0) && (i < (int)numBit) && tempbool) { + return true; + } + + return false; + } + + + /* ============================================================================= + * bitmap_findClear + * -- Returns index of first clear bit + * -- If start index is negative, will start from beginning + * -- If all bits are set, returns -1 + * ============================================================================= + */ + int findClear (int startIndex) { + int i; + boolean tempbool = ((bits[((int)i)/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) > 0) ? true:false; + for (i = MAX(startIndex, 0); i < numBit; i++) { + if (!tempbool) { + return i; + } + } + + return -1; + } + + + /* ============================================================================= + * bitmap_findSet + * -- Returns index of first set bit + * -- If all bits are clear, returns -1 + * ============================================================================= + */ + int findSet (int startIndex) { + int i; + + for (i = MAX(startIndex, 0); i < numBit; i++) { + boolean tempbool = ((int)bits[((int)i)/NUM_BIT_PER_WORD] & (1 << ((int)i % NUM_BIT_PER_WORD)) > 0) ? true:false; + if (tempbool) { + return i; + } + } + + return -1; + } + + + /* ============================================================================= + * bitmap_getNumClear + * ============================================================================= + */ + int getNumClear () { + return (numBit - getNumSet()); + } + + + /* ============================================================================= + * bitmap_getNumSet + * ============================================================================= + */ + int getNumSet () { + int i; + int count = 0; + for (i = 0; i < numBit; i++) { + boolean tempbool = ((int)bits[((int)i)/NUM_BIT_PER_WORD] & (1 << ((int)i % NUM_BIT_PER_WORD)) > 0) ? true:false; + if (tempbool) { + count++; + } + } + + return count; + } + + /* ============================================================================= + * bitmap_toggleAll + * ============================================================================= + */ + void toggleAll () { + int w; + for (w = 0; w < numWord; w++) { + bits[w] ^= -1; + } + } + + int DIVIDE_AND_ROUND_UP(int a, int b) { + return (a/b) + (((a % b) > 0) ? (1) : (0)); + } + + int MAX(int a, int b) { + return (a > b) ? a : b; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Gene.java b/Robust/src/Benchmarks/SingleTM/Genome/Gene.java new file mode 100644 index 00000000..c69b3882 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Gene.java @@ -0,0 +1,37 @@ +public class Gene { + public int length; + public String contents; + public Bitmap startBitmapPtr; /* used for creating segments */ + + Gene(int myLength) { + length = myLength; + contents = ""; + startBitmapPtr = new Bitmap(length); + } + + +/* ============================================================================= + * gene_create + * -- Populate contents with random gene + * ============================================================================= + */ + void create (Random randomObj) { + int i; + char[] nucleotides = new char[4]; + char[] arrayContents = new char[length]; + nucleotides[0] = 'a'; + nucleotides[1] = 'c'; + nucleotides[2] = 'g'; + nucleotides[3] = 't'; + + for (i = 0; i < length; i++) { + int legitimateNumber = (int)randomObj.random_generate(); + if(legitimateNumber < 0) { + legitimateNumber *= -1; + } + arrayContents[i] = nucleotides[legitimateNumber % 4]; + } + + contents = new String(arrayContents); + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Genome.java b/Robust/src/Benchmarks/SingleTM/Genome/Genome.java new file mode 100644 index 00000000..4fa26000 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Genome.java @@ -0,0 +1,131 @@ +public class Genome extends Thread { + int geneLength; + int segmentLength; + int minNumSegment; + int numThread; + + int threadid; + + // add segments, random, etc to member variables + // include in constructor + // allows for passing in thread run function + Random randomPtr; + Gene genePtr; + Segments segmentsPtr; + Sequencer sequencerPtr; + + Genome(String x[]) { + parseCmdLine(x); + if(numThread == 0) { + numThread = 1; + } + + randomPtr = new Random(); + randomPtr.random_alloc(); + randomPtr.random_seed(0); + + genePtr = new Gene(geneLength); + genePtr.create(randomPtr); + + segmentsPtr = new Segments(segmentLength, minNumSegment); + segmentsPtr.create(genePtr, randomPtr); + + sequencerPtr = new Sequencer(geneLength, segmentLength, segmentsPtr); + } + + Genome(int myThreadid, int myGeneLength, int mySegLength, int myMinNumSegs, int myNumThread, Random myRandomPtr, Gene myGenePtr, Segments mySegmentsPtr, Sequencer mySequencerPtr) { + threadid = myThreadid; + geneLength = myGeneLength; + segmentLength = mySegLength; + minNumSegment = myMinNumSegs; + numThread = myNumThread; + + randomPtr = myRandomPtr; + genePtr = myGenePtr; + segmentsPtr = mySegmentsPtr; + sequencerPtr = mySequencerPtr; + } + + public void parseCmdLine(String args[]) { + int i = 0; + String arg; + while (i < args.length && args[i].startsWith("-")) { + arg = args[i++]; + //check options + if(arg.equals("-g")) { + if(i < args.length) { + this.geneLength = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-s")) { + if(i < args.length) { + this.segmentLength = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-n")) { + if(i < args.length) { + this.minNumSegment = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-t")) { + if(i < args.length) { + this.numThread = new Integer(args[i++]).intValue(); + } + } + } + + } + + public void run() { + Barrier.enterBarrier(); + Sequencer.run(threadid, numThread, randomPtr, sequencerPtr); + Barrier.enterBarrier(); + } + + public static void main(String x[]){ + + System.out.print("Creating gene and segments... "); + Genome g = new Genome(x); + + System.out.println("done."); + System.out.println("Gene length = " + g.genePtr.length); + System.out.println("Segment length = " + g.segmentsPtr.length); + System.out.println("Number segments = " + g.segmentsPtr.contentsPtr.size()); + System.out.println("Number threads = " + g.numThread); + + + Barrier.setBarrier((int)g.numThread); + + /* Create and Start Threads */ + + String gene = g.genePtr.contents; + Genome[] gn = new Genome[g.numThread]; + + for(int i = 1; i<g.numThread; i++) { + gn[i] = new Genome(i, g.geneLength, g.segmentLength, g.minNumSegment, g.numThread, g.randomPtr, g.genePtr, g.segmentsPtr, g.sequencerPtr); + } + + System.out.print("Sequencing gene... "); + + for(int i = 1; i<g.numThread; i++) { + gn[i].start(); + } + + Barrier.enterBarrier(); + Sequencer.run(0, g.numThread, g.randomPtr, g.sequencerPtr); + Barrier.enterBarrier(); + + + System.out.println("done."); + + /* Check result */ + { + String sequence = g.sequencerPtr.sequence; + boolean result = (gene.compareTo(sequence) == 0) ? true:false; + System.out.println("Sequence matches gene: " + (result ? "yes" : "no")); + //DEBUG + //if (result) { + // System.out.println("gene = " + gene); + // System.out.println("sequence = " + sequence); + //} + } + + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java b/Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java new file mode 100644 index 00000000..449e0e4a --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Hashtable.java @@ -0,0 +1,65 @@ +public class Hashtable { + List buckets[]; + int numBucket; + int size; + int resizeRatio; + int growthFactor; + + + public Hashtable (int initNumBucket, int resizeRatio, int growthFactor) { + + allocBuckets(initNumBucket); + numBucket = initNumBucket; + size = 0; + resizeRatio = ((resizeRatio < 0) ? 3 : resizeRatio); + growthFactor = ((growthFactor < 0) ? 3 : growthFactor); + } + + public boolean TMhashtable_insert (String keyPtr, String dataPtr) { + int i = hashSegment(keyPtr) % numBucket; + + Pair findPair = new Pair(); + findPair.firstPtr = keyPtr; + Pair pairPtr = buckets[(int)i].find(findPair); + if (pairPtr != null) { + return false; + } + + Pair insertPtr = new Pair(keyPtr, dataPtr); + + /* Add new entry */ + if (buckets[(int)i].insert(insertPtr) == false) { + return false; + } + + size++; + + return true; + } + + void allocBuckets (int numBucket) { + int i; + /* Allocate bucket: extra bucket is dummy for easier iterator code */ + buckets = new List[numBucket+1]; + + for (i = 0; i < (numBucket + 1); i++) { + List chainPtr = new List(); + buckets[(int)i] = chainPtr; + } + } + + int hashSegment (String str) { + int hash = 0; + + int index = 0; + /* Note: Do not change this hashing scheme */ + for(index = 0; index < str.length(); index++) { + char c = str.charAt(index); + hash = c + (hash << 6) + (hash << 16) - hash; + } + + if(hash < 0) hash *= -1; + + return hash; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/List.java b/Robust/src/Benchmarks/SingleTM/Genome/List.java new file mode 100644 index 00000000..68386a89 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/List.java @@ -0,0 +1,66 @@ +public class List { + ListNode head; + int size; + + public List () { + head = new ListNode(); + head.dataPtr = null; + head.nextPtr = null; + size = 0; + } + + Pair find (Pair dataPtr) { + ListNode nodePtr; + ListNode prevPtr = findPrevious(dataPtr); + + nodePtr = prevPtr.nextPtr; + + if ((nodePtr == null) || (compareSegment(nodePtr.dataPtr, dataPtr) != 0)) { + return null; + } + + return (nodePtr.dataPtr); + } + + ListNode findPrevious (Pair dataPtr) { + ListNode prevPtr = head; + ListNode nodePtr; + nodePtr = prevPtr.nextPtr; + + for (; nodePtr != null; nodePtr = nodePtr.nextPtr) { + if (compareSegment(nodePtr.dataPtr, dataPtr) >= 0) { + return prevPtr; + } + prevPtr = nodePtr; + } + + return prevPtr; + } + + boolean insert (Pair dataPtr) { + ListNode prevPtr; + ListNode nodePtr; + ListNode currPtr; + + prevPtr = findPrevious(dataPtr); + currPtr = prevPtr.nextPtr; + + if ((currPtr != null) && (compareSegment((Pair)currPtr.dataPtr, (Pair)dataPtr) == 0)) { + return false; + } + + nodePtr = new ListNode(dataPtr); + + nodePtr.nextPtr = currPtr; + prevPtr.nextPtr = nodePtr; + size++; + + return true; + } + + int compareSegment (Pair a, Pair b) { + String aString = a.firstPtr; + String bString = b.firstPtr; + return aString.compareTo(bString); + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/ListNode.java b/Robust/src/Benchmarks/SingleTM/Genome/ListNode.java new file mode 100644 index 00000000..ee48380b --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/ListNode.java @@ -0,0 +1,14 @@ +public class ListNode { + Pair dataPtr; + ListNode nextPtr; + + public ListNode () { + dataPtr = null; + nextPtr = null; + } + + public ListNode (Pair myDataPtr) { + dataPtr = myDataPtr; + nextPtr = null; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Pair.java b/Robust/src/Benchmarks/SingleTM/Genome/Pair.java new file mode 100644 index 00000000..89886bdd --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Pair.java @@ -0,0 +1,14 @@ +public class Pair { + String firstPtr; + String secondPtr; + + public Pair() { + firstPtr = null; + secondPtr = null; + } + + public Pair(String myFirstPtr, String mySecondPtr) { + firstPtr = myFirstPtr; + secondPtr = mySecondPtr; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Segments.java b/Robust/src/Benchmarks/SingleTM/Genome/Segments.java new file mode 100644 index 00000000..de1495b6 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Segments.java @@ -0,0 +1,70 @@ +public class Segments { + public int length; + public int minNum; + Vector contentsPtr; + /* private: */ + String strings[]; + + Segments (int myLength, int myMinNum) { + minNum = myMinNum; + length = myLength; + + strings = new String[(int)minNum]; + contentsPtr = new Vector((int)minNum); + } + + + /* ============================================================================= + * segments_create + * -- Populates 'contentsPtr' + * ============================================================================= + */ + void create (Gene genePtr, Random randomPtr) { + String geneString; + int geneLength; + Bitmap startBitmapPtr; + int numStart; + int i; + int maxZeroRunLength; + + geneString = genePtr.contents; + geneLength = genePtr.length; + startBitmapPtr = genePtr.startBitmapPtr; + numStart = geneLength - length + 1; + + /* Pick some random segments to start */ + for (i = 0; i < minNum; i++) { + int j = (int)(randomPtr.random_generate() % numStart); + boolean status = startBitmapPtr.set(j); + strings[i] = geneString.substring((int)j, (int)(j+length)); // WRITE SUBSTRING FUNCTION + contentsPtr.addElement(strings[i]); + } + + /* Make sure segment covers start */ + i = 0; + if (!startBitmapPtr.isSet(i)) { + String string; + string = geneString.subString((int)i, (int)(i+length)); // USE BYTE SUBSTRING FUNCTION + contentsPtr.addElement(string); + startBitmapPtr.set(i); + } + + /* Add extra segments to fill holes and ensure overlap */ + maxZeroRunLength = length - 1; + for (i = 0; i < numStart; i++) { + int i_stop = Math.imin((i+maxZeroRunLength), numStart); + for ( /* continue */; i < i_stop; i++) { + if (startBitmapPtr.isSet(i)) { + break; + } + } + if (i == i_stop) { + /* Found big enough hole */ + i = i - 1; + String string = geneString.subString((int)i, (int)(i+length)); // USE BYTE SUBSTRING FUNCTION + contentsPtr.addElement(string); + startBitmapPtr.set(i); + } + } + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java b/Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java new file mode 100644 index 00000000..65407344 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Sequencer.java @@ -0,0 +1,400 @@ +public class Sequencer { + + public String sequence; + + public Segments segmentsPtr; + + /* For removing duplicate segments */ + Hashtable uniqueSegmentsPtr; + + /* For matching segments */ + endInfoEntry endInfoEntries[]; + Table startHashToConstructEntryTables[]; + + /* For constructing sequence */ + constructEntry constructEntries[]; + Table hashToConstructEntryTable; + + /* For deallocation */ + int segmentLength; + + + /* ============================================================================= + * sequencer_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + public Sequencer (int myGeneLength, int mySegmentLength, Segments mySegmentsPtr) { + + int maxNumUniqueSegment = myGeneLength - mySegmentLength + 1; + int i; + + uniqueSegmentsPtr = new Hashtable((int)myGeneLength, -1, -1); + + /* For finding a matching entry */ + endInfoEntries = new endInfoEntry[maxNumUniqueSegment]; + for (i = 0; i < maxNumUniqueSegment; i++) { + endInfoEntries[i] = new endInfoEntry(true, 1); + } + + startHashToConstructEntryTables = new Table[mySegmentLength]; + for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */ + startHashToConstructEntryTables[i] = new Table(myGeneLength); + } + segmentLength = mySegmentLength; + + /* For constructing sequence */ + constructEntries = new constructEntry[maxNumUniqueSegment]; + + for (i= 0; i < maxNumUniqueSegment; i++) { + constructEntries[i] = new constructEntry(null, true, 0, null, null, null, 0, segmentLength); + } + hashToConstructEntryTable = new Table(myGeneLength); + + segmentsPtr = mySegmentsPtr; + } + + + /* ============================================================================= + * sequencer_run + * ============================================================================= + */ + + public static void run (int threadNum, int numOfThreads, Random randomPtr, Sequencer sequencerPtr) { + + int threadId = threadNum; + + Segments segmentsPtr = sequencerPtr.segmentsPtr; + + Hashtable uniqueSegmentsPtr = sequencerPtr.uniqueSegmentsPtr; + endInfoEntry endInfoEntries[] = sequencerPtr.endInfoEntries; + Table startHashToConstructEntryTables[] = sequencerPtr.startHashToConstructEntryTables; + constructEntry constructEntries[] = sequencerPtr.constructEntries; + Table hashToConstructEntryTable = sequencerPtr.hashToConstructEntryTable; + + Vector segmentsContentsPtr = segmentsPtr.contentsPtr; + int numSegment = segmentsContentsPtr.size(); + int segmentLength = segmentsPtr.length; + + int i; + int j; + int i_start; + int i_stop; + int numUniqueSegment; + int substringLength; + int entryIndex; + + int CHUNK_STEP1 = 12; + + /* + * Step 1: Remove duplicate segments + */ + int numThread = numOfThreads; + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + int partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = numSegment; + } else { + i_stop = i_start + partitionSize; + } + } + + for (i = i_start; i < i_stop; i+=CHUNK_STEP1) { + atomic { + int ii; + int ii_stop = Math.imin(i_stop, (i+CHUNK_STEP1)); + for (ii = i; ii < ii_stop; ii++) { + String segment = (String)segmentsContentsPtr.elementAt(ii); + if(!uniqueSegmentsPtr.TMhashtable_insert(segment, segment)) { + ; + } + } /* ii */ + } + } + + Barrier.enterBarrier(); + + /* + * Step 2a: Iterate over unique segments and compute hashes. + * + * For the gene "atcg", the hashes for the end would be: + * + * "t", "tc", and "tcg" + * + * And for the gene "tcgg", the hashes for the start would be: + * + * "t", "tc", and "tcg" + * + * The names are "end" and "start" because if a matching pair is found, + * they are the substring of the end part of the pair and the start + * part of the pair respectively. In the above example, "tcg" is the + * matching substring so: + * + * (end) (start) + * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg") + */ + + /* uniqueSegmentsPtr is constant now */ + numUniqueSegment = uniqueSegmentsPtr.size; + entryIndex = 0; + + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + int num = uniqueSegmentsPtr.numBucket; + int partitionSize = (num + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = num; + } else { + i_stop = i_start + partitionSize; + } + } + + { + /* Approximate disjoint segments of element allocation in constructEntries */ + int partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + entryIndex = threadId * partitionSize; + } + + for (i = i_start; i < i_stop; i++) { + List chainPtr = uniqueSegmentsPtr.buckets[i]; + ListNode it = chainPtr.head; + + while(it.nextPtr != null) { + it = it.nextPtr; + String segment = it.dataPtr.firstPtr; + int newj; + int startHash; + boolean status; + + /* Find an empty constructEntries entry */ + atomic { + while(constructEntries[entryIndex].segment != null) { + entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */ + } + constructEntries[entryIndex].segment = segment; + } + + constructEntry constructEntryPtr = constructEntries[entryIndex]; + + entryIndex = (entryIndex + 1) % numUniqueSegment; + + + + /* + * Save hashes (sdbm algorithm) of segment substrings + * + * endHashes will be computed for shorter substrings after matches + * have been made (in the next phase of the code). This will reduce + * the number of substrings for which hashes need to be computed. + * + * Since we can compute startHashes incrementally, we go ahead + * and compute all of them here. + */ + /* constructEntryPtr is local now */ + constructEntryPtr.endHash = hashString(segment.substring(1)); // USE BYTE SUBSTRING FUNCTION + + startHash = 0; + for (newj = 1; newj < segmentLength; newj++) { + startHash = segment.charAt((int)newj-1) + (startHash << 6) + (startHash << 16) - startHash; + atomic { + boolean check = startHashToConstructEntryTables[newj].table_insert(startHash, constructEntryPtr); + } + + } + + + /* + * For looking up construct entries quickly + */ + startHash = segment.charAt((int)newj-1) + (startHash << 6) + (startHash << 16) - startHash; + atomic { + hashToConstructEntryTable.table_insert(startHash, constructEntryPtr); + } + } + } + + Barrier.enterBarrier(); + + /* + * Step 2b: Match ends to starts by using hash-based string comparison. + */ + for (substringLength = segmentLength-1; substringLength > 0; substringLength--) { + + Table startHashToConstructEntryTablePtr = startHashToConstructEntryTables[substringLength]; + LinkedList buckets[] = startHashToConstructEntryTablePtr.buckets; + int numBucket = startHashToConstructEntryTablePtr.numBucket; + + int index_start; + int index_stop; + + { + /* Choose disjoint segments [index_start,index_stop) for each thread */ + int partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + index_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + index_stop = numUniqueSegment; + } else { + index_stop = index_start + partitionSize; + } + } + + /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */ + for (entryIndex = index_start; + entryIndex < index_stop; + entryIndex += endInfoEntries[entryIndex].jumpToNext) + { + if (!endInfoEntries[entryIndex].isEnd) { + continue; + } + + /* ConstructEntries[entryIndex] is local data */ + constructEntry endConstructEntryPtr = constructEntries[entryIndex]; + String endSegment = endConstructEntryPtr.segment; + int endHash = endConstructEntryPtr.endHash; + + LinkedList chainPtr = buckets[(endHash % numBucket)]; /* buckets: constant data */ + LinkedListIterator it = (LinkedListIterator)chainPtr.iterator(); + while (it.hasNext()) { + constructEntry startConstructEntryPtr = (constructEntry)it.next(); + String startSegment = startConstructEntryPtr.segment; + int newLength = 0; + + /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */ + atomic { + if(startConstructEntryPtr.isStart && + (endConstructEntryPtr.startPtr != startConstructEntryPtr) && + (startSegment.substring(0, (int)substringLength).compareTo(endSegment.substring((int)(segmentLength-substringLength))) == 0)) + { + startConstructEntryPtr.isStart = false; + constructEntry startConstructEntry_endPtr; + constructEntry endConstructEntry_startPtr; + + /* Update endInfo (appended something so no inter end) */ + endInfoEntries[entryIndex].isEnd = false; + /* Update segment chain construct info */ + startConstructEntry_endPtr = startConstructEntryPtr.endPtr; + endConstructEntry_startPtr = endConstructEntryPtr.startPtr; + startConstructEntry_endPtr.startPtr = endConstructEntry_startPtr; + endConstructEntryPtr.nextPtr = startConstructEntryPtr; + endConstructEntry_startPtr.endPtr = startConstructEntry_endPtr; + endConstructEntryPtr.overlap = substringLength; + newLength = endConstructEntry_startPtr.length + startConstructEntryPtr.length - substringLength; + endConstructEntry_startPtr.length = newLength; + } else {/* if (matched) */ + } + } + + if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */ + break; + } + } /* iterate over chain */ + + } /* for (endIndex < numUniqueSegment) */ + + Barrier.enterBarrier(); + + /* + * Step 2c: Update jump values and hashes + * + * endHash entries of all remaining ends are updated to the next + * substringLength. Additionally jumpToNext entries are updated such + * that they allow to skip non-end entries. Currently this is sequential + * because parallelization did not perform better. + */ + + if (threadId == 0) { + if (substringLength > 1) { + int index = segmentLength - substringLength + 1; + /* initialization if j and i: with i being the next end after j=0 */ + for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) { + /* find first non-null */ + ; + } + /* entry 0 is handled seperately from the loop below */ + endInfoEntries[0].jumpToNext = i; + if (endInfoEntries[0].isEnd) { + String segment = constructEntries[0].segment; + constructEntries[0].endHash = hashString(segment.subString((int)index)); // USE BYTE SUBSTRING FUNCTION + } + /* Continue scanning (do not reset i) */ + for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) { + + if (endInfoEntries[i].isEnd) { + String segment = constructEntries[i].segment; + constructEntries[i].endHash = hashString(segment.substring((int)index)); // USE BYTE SUBSTRING FUNCTION + endInfoEntries[j].jumpToNext = Math.imax((int)1, (int)(i - j)); + j = i; + } + } + endInfoEntries[j].jumpToNext = i - j; + } + } + + Barrier.enterBarrier(); + + } /* for (substringLength > 0) */ + + Barrier.enterBarrier(); + + /* + * Step 3: Build sequence string + */ + if (threadId == 0) { + int totalLength = 0; + for (i = 0; i < numUniqueSegment; i++) { + if (constructEntries[i].isStart) { + totalLength += constructEntries[i].length; + } + } + + String sequence = sequencerPtr.sequence; + + String copyPtr = sequence; + int sequenceLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + /* If there are several start segments, we append in arbitrary order */ + constructEntry constructEntryPtr = constructEntries[i]; + if (constructEntryPtr.isStart) { + int newSequenceLength = sequenceLength + constructEntryPtr.length; + int prevOverlap = 0; + do { + int numChar = segmentLength - constructEntryPtr.overlap; + copyPtr = constructEntryPtr.segment; + if(sequencerPtr.sequence == null) { + sequencerPtr.sequence = copyPtr; + } else { + sequencerPtr.sequence = sequencerPtr.sequence.concat(copyPtr.substring((int)(prevOverlap))); + } + prevOverlap = constructEntryPtr.overlap; + constructEntryPtr = constructEntryPtr.nextPtr; + } while (constructEntryPtr != null); + } + } + } + } + + /* ============================================================================= + * hashString + * -- uses sdbm hash function + * ============================================================================= + */ + static int hashString (String str) + { + int hash = 0; + + int index = 0; + // Note: Do not change this hashing scheme + for(index = 0; index < str.length(); index++) { + char c = str.charAt(index); + hash = c + (hash << 6) + (hash << 16) - hash; + } + + if(hash < 0) hash *= -1; + + return hash; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/Table.java b/Robust/src/Benchmarks/SingleTM/Genome/Table.java new file mode 100644 index 00000000..653728f7 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/Table.java @@ -0,0 +1,59 @@ +public class Table { + + LinkedList buckets[]; + int numBucket; + + + /* ============================================================================= + * table_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Table (int myNumBucket) { + + int i; + + buckets = new LinkedList[myNumBucket]; + for(i = 0; i < myNumBucket; i++) { + buckets[i] = new LinkedList(); + } + + numBucket = myNumBucket; + + } + + + /* ============================================================================= + * table_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ + boolean table_insert (int hash, Object dataPtr) { + int i = (int)(hash % numBucket); + if(i < 0) i *= -1; + if(buckets[i].contains(dataPtr)) { + return false; + } + buckets[i].add(dataPtr); + return true; + } + + /* ============================================================================= + * table_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ + boolean table_remove (int hash, Object dataPtr) { + + int i = (int)(hash % numBucket); + boolean tempbool = buckets[i].contains(dataPtr); + if (tempbool) { + buckets[i].remove(dataPtr); + return true; + } + + return false; + + } + +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java b/Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java new file mode 100644 index 00000000..e0c17ecb --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/constructEntry.java @@ -0,0 +1,25 @@ +public class constructEntry { + boolean isStart; + String segment; + int endHash; + constructEntry startPtr; + constructEntry nextPtr; + constructEntry endPtr; + int overlap; + int length; + + constructEntry(String mySegment, boolean myStart, int myEndHash, constructEntry myStartPtr, constructEntry myNextPtr, constructEntry myEndPtr, int myOverlap, int myLength) { + segment = mySegment; + isStart = myStart; + endHash = myEndHash; + startPtr = this; + nextPtr = myNextPtr; + endPtr = this; + overlap = myOverlap; + length = myLength; + } + + boolean equals(constructEntry copy) { + return ((segment.compareTo(copy.segment) == 0) && (isStart == copy.isStart) && (endHash == copy.endHash) && (startPtr == copy.startPtr) && (nextPtr == copy.nextPtr) && (endPtr == copy.endPtr) && (overlap == copy.overlap) && (length == copy.length)); + } +} diff --git a/Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java b/Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java new file mode 100644 index 00000000..3827a0e4 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/endInfoEntry.java @@ -0,0 +1,13 @@ + public class endInfoEntry { + boolean isEnd; + int jumpToNext; + + public endInfoEntry() { + isEnd = false; + jumpToNext = 0; + } + public endInfoEntry(boolean myEnd, int myNext) { + isEnd = myEnd; + jumpToNext = myNext; + } + } diff --git a/Robust/src/Benchmarks/SingleTM/Genome/makefile b/Robust/src/Benchmarks/SingleTM/Genome/makefile new file mode 100644 index 00000000..8f1f2e0f --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/Genome/makefile @@ -0,0 +1,20 @@ +MAINCLASS=Genome +SRC=${MAINCLASS}.java \ + ../common/Random.java \ + Bitmap.java \ + Gene.java \ + Segments.java \ + endInfoEntry.java \ + constructEntry.java \ + ../../../ClassLibrary/JavaSTM/Barrier.java \ + Sequencer.java \ + Table.java \ + Hashtable.java +FLAGS=-mainclass ${MAINCLASS} -singleTM -optimize -dcopts -abcclose -fastmemcpy -joptimize + +default: + ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC} + +clean: + rm -rf tmpbuilddirectory + rm *.bin