From: afedward Date: Wed, 29 Apr 2009 00:05:01 +0000 (+0000) Subject: Updating CVS with partially ported benchmark. Sequencer.java's run() function is... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a7c4105a08f1c57278a059d449ef213eddfefc5f;p=IRC.git Updating CVS with partially ported benchmark. Sequencer.java's run() function is the last function to be ported, then compilation and testing. --- diff --git a/Robust/src/Benchmarks/SingleTM/genome/Defines.common.mk b/Robust/src/Benchmarks/SingleTM/genome/Defines.common.mk new file mode 100644 index 00000000..d8a356b9 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/Defines.common.mk @@ -0,0 +1,36 @@ +# ============================================================================== +# +# Defines.common.mk +# +# ============================================================================== + + +CFLAGS += -DLIST_NO_DUPLICATES +CFLAGS += -DCHUNK_STEP1=12 + +PROG := genome + +SRCS += \ + gene.c \ + genome.c \ + segments.c \ + sequencer.c \ + table.c \ + $(LIB)/bitmap.c \ + $(LIB)/hash.c \ + $(LIB)/hashtable.c \ + $(LIB)/pair.c \ + $(LIB)/random.c \ + $(LIB)/list.c \ + $(LIB)/mt19937ar.c \ + $(LIB)/thread.c \ + $(LIB)/vector.c \ +# +OBJS := ${SRCS:.c=.o} + + +# ============================================================================== +# +# End of Defines.common.mk +# +# ============================================================================== diff --git a/Robust/src/Benchmarks/SingleTM/genome/Makefile.seq b/Robust/src/Benchmarks/SingleTM/genome/Makefile.seq new file mode 100644 index 00000000..e143931e --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/Makefile.seq @@ -0,0 +1,18 @@ +# ============================================================================== +# +# Makefile.seq +# +# ============================================================================== + + +include ../common/Defines.common.mk +include ./Defines.common.mk +include ../common/Makefile.seq + + +# ============================================================================== +# +# Makefile.seq +# +# ============================================================================== + diff --git a/Robust/src/Benchmarks/SingleTM/genome/Makefile.stm b/Robust/src/Benchmarks/SingleTM/genome/Makefile.stm new file mode 100644 index 00000000..5206ffff --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/Makefile.stm @@ -0,0 +1,17 @@ +# ============================================================================== +# +# Makefile.stm +# +# ============================================================================== + + +include ../common/Defines.common.mk +include ./Defines.common.mk +include ../common/Makefile.stm + + +# ============================================================================== +# +# End of Makefile.stm +# +# ============================================================================== diff --git a/Robust/src/Benchmarks/SingleTM/genome/Makefile.stm.otm b/Robust/src/Benchmarks/SingleTM/genome/Makefile.stm.otm new file mode 100644 index 00000000..7dc12e57 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/Makefile.stm.otm @@ -0,0 +1,17 @@ +# ============================================================================== +# +# Makefile.stm.otm +# +# ============================================================================== + + +include ../common/Defines.common.otm.mk +include ./Defines.common.mk +include ../common/Makefile.stm.otm + + +# ============================================================================== +# +# End of Makefile.stm.otm +# +# ============================================================================== diff --git a/Robust/src/Benchmarks/SingleTM/genome/README b/Robust/src/Benchmarks/SingleTM/genome/README new file mode 100644 index 00000000..8960be62 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/README @@ -0,0 +1,95 @@ +Introduction +------------ + +This benchmark implements a gene sequencing program that reconstructs the gene +sequence from segments of a larger gene. + +For example, given the segments TCGG, GCAG, ATCG, CAGC, and GATC, the program +will try to construct the shortest gene that can be made from them. + +For example, if we slide around the above segments we can get: + + TCGG + GCAG + ATCG + CAGC + GATC + ============= + CAGCAGATCGG + + +This gives a final sequence of length 11. Another possible solution is: + + TCGG + GCAG + ATCG + CAGC + GATC + ============= + GATCGGCAGC + +This solution has length 10. Both are consistent with the segments provided, +but the second is the optimal solution since it is shorter. + +The algorithm used to sequence the gene has three phases: + + 1) Remove duplicate segments by using hash-set + 2) Match segments using Rabin-Karp string search algorithm [3] + - Cycles are prevented by tracking starts/ends of matched chains + 3) Build sequence + +The first two steps make up the bulk of the execution time and are parallelized. + + +Compiling and Running +--------------------- + +To build the application, simply run: + + make -f + +in the source directory. For example, for the sequential flavor, run: + + make -f Makefile.seq + +By default, this produces an executable named "genome", which can then be +run in the following manner: + + ./genome -g \ + -s \ + -n \ + -t + +To produce the data in [1] and [2], the following values were used: + + -g256 -s16 -n16384 + +For running without a simulator, use the default values: + + -g16384 -s64 -n16777216 + + +Workload Size +------------- + +The size of the workload is determined by the -g, -s, and -n options. The +gene sequencing example in "Introduction", would correspond to -g10 -s4 -n5. +In general, the values for these three options should follow the following +relationship: -s << -g << -n. Larger values increase the size of the workload. + + +References +---------- + +[1] C. Cao Minh, J. Chung, C. Kozyrakis, and K. Olukotun. STAMP: Stanford + Transactional Applications for Multi-processing. In IISWC '08: Proceedings + of The IEEE International Symposium on Workload Characterization, + September 2008. + +[2] C. Cao Minh, M. Trautmann, J. Chung, A. McDonald, N. Bronson, J. Casper, + C. Kozyrakis, and K. Olukotun. An Effective Hybrid Transactional Memory + System with Strong Isolation Guarantees. In Proceedings of the 34th Annual + International Symposium on Computer Architecture, 2007. + +[3] R. M. Karp and M. O. Rabin. Efficient randomized pattern-matching + algorithms. IBM Journal of Research and Development, 1987. diff --git a/Robust/src/Benchmarks/SingleTM/genome/gene.c b/Robust/src/Benchmarks/SingleTM/genome/gene.c new file mode 100644 index 00000000..60e0e9d4 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/gene.c @@ -0,0 +1,228 @@ +/* ============================================================================= + * + * gene.c + * -- Create random gene + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#include +#include +#include "gene.h" +#include "nucleotide.h" +#include "random.h" +#include "tm.h" + + +/* ============================================================================= + * gene_alloc + * -- Does all memory allocation necessary for gene creation + * -- Returns NULL on failure + * ============================================================================= + */ +gene_t* +gene_alloc (long length) +{ + gene_t* genePtr; + + assert(length > 1); + + genePtr = (gene_t*)malloc(sizeof(gene_t)); + if (genePtr == NULL) { + return NULL; + } + + genePtr->contents = (char*)malloc((length + 1) * sizeof(char)); + if (genePtr->contents == NULL) { + return NULL; + } + genePtr->contents[length] = '\0'; + genePtr->length = length; + + genePtr->startBitmapPtr = bitmap_alloc(length); + if (genePtr->startBitmapPtr == NULL) { + return NULL; + } + + return genePtr; +} + + +/* ============================================================================= + * gene_create + * -- Populate contents with random gene + * ============================================================================= + */ +void +gene_create (gene_t* genePtr, random_t* randomPtr) +{ + long length; + char* contents; + long i; + const char nucleotides[] = { + NUCLEOTIDE_ADENINE, + NUCLEOTIDE_CYTOSINE, + NUCLEOTIDE_GUANINE, + NUCLEOTIDE_THYMINE, + }; + + assert(genePtr != NULL); + assert(randomPtr != NULL); + + length = genePtr->length; + contents = genePtr->contents; + + for (i = 0; i < length; i++) { + contents[i] = + nucleotides[(random_generate(randomPtr)% NUCLEOTIDE_NUM_TYPE)]; + } +} + + +/* ============================================================================= + * gene_free + * ============================================================================= + */ +void +gene_free (gene_t* genePtr) +{ + bitmap_free(genePtr->startBitmapPtr); + free(genePtr->contents); + free(genePtr); +} + + +/* ============================================================================= + * TEST_GENE + * ============================================================================= + */ +#ifdef TEST_GENE + + +#include +#include +#include + + +int +main () +{ + gene_t* gene1Ptr; + gene_t* gene2Ptr; + gene_t* gene3Ptr; + random_t* randomPtr; + + bool_t status = memory_init(1, 4, 2); + assert(status); + + puts("Starting..."); + + gene1Ptr = gene_alloc(10); + gene2Ptr = gene_alloc(10); + gene3Ptr = gene_alloc(9); + randomPtr = random_alloc(); + + random_seed(randomPtr, 0); + gene_create(gene1Ptr, randomPtr); + random_seed(randomPtr, 1); + gene_create(gene2Ptr, randomPtr); + random_seed(randomPtr, 0); + gene_create(gene3Ptr, randomPtr); + + assert(gene1Ptr->length == strlen(gene1Ptr->contents)); + assert(gene2Ptr->length == strlen(gene2Ptr->contents)); + assert(gene3Ptr->length == strlen(gene3Ptr->contents)); + + assert(gene1Ptr->length == gene2Ptr->length); + assert(strcmp(gene1Ptr->contents, gene2Ptr->contents) != 0); + + assert(gene1Ptr->length == (gene3Ptr->length + 1)); + assert(strcmp(gene1Ptr->contents, gene3Ptr->contents) != 0); + assert(strncmp(gene1Ptr->contents, + gene3Ptr->contents, + gene3Ptr->length) == 0); + + gene_free(gene1Ptr); + gene_free(gene2Ptr); + gene_free(gene3Ptr); + random_free(randomPtr); + + puts("All tests passed."); + + return 0; +} + + +#endif /* TEST_GENE */ + + +/* ============================================================================= + * + * End of gene.c + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/gene.h b/Robust/src/Benchmarks/SingleTM/genome/gene.h new file mode 100644 index 00000000..b4fa2db1 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/gene.h @@ -0,0 +1,123 @@ +/* ============================================================================= + * + * gene.h + * -- Create random gene + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#ifndef GENE_H +#define GENE_H 1 + + +#include "bitmap.h" +#include "random.h" + + +typedef struct gene { + long length; + char* contents; + bitmap_t* startBitmapPtr; /* used for creating segments */ +} gene_t; + + +/* ============================================================================= + * gene_alloc + * -- Does all memory allocation necessary for gene creation + * -- Returns NULL on failure + * ============================================================================= + */ +gene_t* +gene_alloc (long length); + + +/* ============================================================================= + * gene_create + * -- Populate contents with random gene + * ============================================================================= + */ +void +gene_create (gene_t* genePtr, random_t* randomPtr); + + +/* ============================================================================= + * gene_free + * ============================================================================= + */ +void +gene_free (gene_t* genePtr); + + +#endif /* GENE_H */ + + +/* ============================================================================= + * + * End of gene.h + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/gene.o b/Robust/src/Benchmarks/SingleTM/genome/gene.o new file mode 100644 index 00000000..c4b94f87 Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/gene.o differ diff --git a/Robust/src/Benchmarks/SingleTM/genome/genome b/Robust/src/Benchmarks/SingleTM/genome/genome new file mode 100755 index 00000000..8f88639c Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/genome differ diff --git a/Robust/src/Benchmarks/SingleTM/genome/genome.c b/Robust/src/Benchmarks/SingleTM/genome/genome.c new file mode 100644 index 00000000..8a1ca271 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/genome.c @@ -0,0 +1,285 @@ +/* ============================================================================= + * + * genome.c + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#include +#include +#include +#include +#include +#include "gene.h" +#include "random.h" +#include "segments.h" +#include "sequencer.h" +#include "thread.h" +#include "timer.h" +#include "tm.h" +#include "vector.h" + + +enum param_types { + PARAM_GENE = (unsigned char)'g', + PARAM_NUMBER = (unsigned char)'n', + PARAM_SEGMENT = (unsigned char)'s', + PARAM_THREAD = (unsigned char)'t', +}; + + +#define PARAM_DEFAULT_GENE (1L << 14) +#define PARAM_DEFAULT_NUMBER (1L << 22) +#define PARAM_DEFAULT_SEGMENT (1L << 6) +#define PARAM_DEFAULT_THREAD (1L) + + +long global_params[256]; /* 256 = ascii limit */ + + +/* ============================================================================= + * displayUsage + * ============================================================================= + */ +static void +displayUsage (const char* appName) +{ + printf("Usage: %s [options]\n", appName); + puts("\nOptions: (defaults)\n"); + printf(" g Length of [g]ene (%li)\n", PARAM_DEFAULT_GENE); + printf(" n Min [n]umber of segments (%li)\n", PARAM_DEFAULT_NUMBER); + printf(" s Length of [s]egment (%li)\n", PARAM_DEFAULT_SEGMENT); + printf(" t Number of [t]hreads (%li)\n", PARAM_DEFAULT_THREAD); + puts(""); + puts("The actual number of segments created may be greater than -n"); + puts("in order to completely cover the gene."); + exit(1); +} + + +/* ============================================================================= + * setDefaultParams + * ============================================================================= + */ +static void +setDefaultParams( void ) +{ + global_params[PARAM_GENE] = PARAM_DEFAULT_GENE; + global_params[PARAM_NUMBER] = PARAM_DEFAULT_NUMBER; + global_params[PARAM_SEGMENT] = PARAM_DEFAULT_SEGMENT; + global_params[PARAM_THREAD] = PARAM_DEFAULT_THREAD; +} + + +/* ============================================================================= + * parseArgs + * ============================================================================= + */ +static void +parseArgs (long argc, char* const argv[]) +{ + long i; + long opt; + + opterr = 0; + + setDefaultParams(); + + while ((opt = getopt(argc, argv, "g:n:s:t:")) != -1) { + switch (opt) { + case 'g': + case 'n': + case 's': + case 't': + global_params[(unsigned char)opt] = atol(optarg); + break; + case '?': + default: + opterr++; + break; + } + } + + for (i = optind; i < argc; i++) { + fprintf(stderr, "Non-option argument: %s\n", argv[i]); + opterr++; + } + + if (opterr) { + displayUsage(argv[0]); + } +} + + +/* ============================================================================= + * main + * ============================================================================= + */ +MAIN (argc,argv) +{ + TIMER_T start; + TIMER_T stop; + + GOTO_REAL(); + + /* Initialization */ + parseArgs(argc, (char** const)argv); + SIM_GET_NUM_CPU(global_params[PARAM_THREAD]); + + printf("Creating gene and segments... "); + fflush(stdout); + + long geneLength = global_params[PARAM_GENE]; + long segmentLength = global_params[PARAM_SEGMENT]; + long minNumSegment = global_params[PARAM_NUMBER]; + long numThread = global_params[PARAM_THREAD]; + + TM_STARTUP(numThread); + P_MEMORY_STARTUP(numThread); + thread_startup(numThread); + + random_t* randomPtr = random_alloc(); + assert(randomPtr != NULL); + random_seed(randomPtr, 0); + + gene_t* genePtr = gene_alloc(geneLength); + assert( genePtr != NULL); + gene_create(genePtr, randomPtr); + char* gene = genePtr->contents; + + segments_t* segmentsPtr = segments_alloc(segmentLength, minNumSegment); + assert(segmentsPtr != NULL); + segments_create(segmentsPtr, genePtr, randomPtr); + sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr); + assert(sequencerPtr != NULL); + + puts("done."); + printf("Gene length = %li\n", genePtr->length); + printf("Segment length = %li\n", segmentsPtr->length); + printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr)); + fflush(stdout); + + /* Benchmark */ + printf("Sequencing gene... "); + fflush(stdout); + TIMER_READ(start); + GOTO_SIM(); +#ifdef OTM +#pragma omp parallel + { + sequencer_run(sequencerPtr); + } +#else + thread_start(sequencer_run, (void*)sequencerPtr); +#endif + GOTO_REAL(); + TIMER_READ(stop); + puts("done."); + printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop)); + fflush(stdout); + + /* Check result */ + { + char* sequence = sequencerPtr->sequence; + int result = strcmp(gene, sequence); + printf("Sequence matches gene: %s\n", (result ? "no" : "yes")); + if (result) { + printf("gene = %s\n", gene); + printf("sequence = %s\n", sequence); + } + fflush(stdout); + assert(strlen(sequence) >= strlen(gene)); + } + + /* Clean up */ + printf("Deallocating memory... "); + fflush(stdout); + sequencer_free(sequencerPtr); + segments_free(segmentsPtr); + gene_free(genePtr); + random_free(randomPtr); + puts("done."); + fflush(stdout); + + TM_SHUTDOWN(); + P_MEMORY_SHUTDOWN(); + + GOTO_SIM(); + + thread_shutdown(); + + MAIN_RETURN(0); +} + + + +/* ============================================================================= + * + * End of genome.c + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/genome.c~ b/Robust/src/Benchmarks/SingleTM/genome/genome.c~ new file mode 100644 index 00000000..8a1ca271 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/genome.c~ @@ -0,0 +1,285 @@ +/* ============================================================================= + * + * genome.c + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#include +#include +#include +#include +#include +#include "gene.h" +#include "random.h" +#include "segments.h" +#include "sequencer.h" +#include "thread.h" +#include "timer.h" +#include "tm.h" +#include "vector.h" + + +enum param_types { + PARAM_GENE = (unsigned char)'g', + PARAM_NUMBER = (unsigned char)'n', + PARAM_SEGMENT = (unsigned char)'s', + PARAM_THREAD = (unsigned char)'t', +}; + + +#define PARAM_DEFAULT_GENE (1L << 14) +#define PARAM_DEFAULT_NUMBER (1L << 22) +#define PARAM_DEFAULT_SEGMENT (1L << 6) +#define PARAM_DEFAULT_THREAD (1L) + + +long global_params[256]; /* 256 = ascii limit */ + + +/* ============================================================================= + * displayUsage + * ============================================================================= + */ +static void +displayUsage (const char* appName) +{ + printf("Usage: %s [options]\n", appName); + puts("\nOptions: (defaults)\n"); + printf(" g Length of [g]ene (%li)\n", PARAM_DEFAULT_GENE); + printf(" n Min [n]umber of segments (%li)\n", PARAM_DEFAULT_NUMBER); + printf(" s Length of [s]egment (%li)\n", PARAM_DEFAULT_SEGMENT); + printf(" t Number of [t]hreads (%li)\n", PARAM_DEFAULT_THREAD); + puts(""); + puts("The actual number of segments created may be greater than -n"); + puts("in order to completely cover the gene."); + exit(1); +} + + +/* ============================================================================= + * setDefaultParams + * ============================================================================= + */ +static void +setDefaultParams( void ) +{ + global_params[PARAM_GENE] = PARAM_DEFAULT_GENE; + global_params[PARAM_NUMBER] = PARAM_DEFAULT_NUMBER; + global_params[PARAM_SEGMENT] = PARAM_DEFAULT_SEGMENT; + global_params[PARAM_THREAD] = PARAM_DEFAULT_THREAD; +} + + +/* ============================================================================= + * parseArgs + * ============================================================================= + */ +static void +parseArgs (long argc, char* const argv[]) +{ + long i; + long opt; + + opterr = 0; + + setDefaultParams(); + + while ((opt = getopt(argc, argv, "g:n:s:t:")) != -1) { + switch (opt) { + case 'g': + case 'n': + case 's': + case 't': + global_params[(unsigned char)opt] = atol(optarg); + break; + case '?': + default: + opterr++; + break; + } + } + + for (i = optind; i < argc; i++) { + fprintf(stderr, "Non-option argument: %s\n", argv[i]); + opterr++; + } + + if (opterr) { + displayUsage(argv[0]); + } +} + + +/* ============================================================================= + * main + * ============================================================================= + */ +MAIN (argc,argv) +{ + TIMER_T start; + TIMER_T stop; + + GOTO_REAL(); + + /* Initialization */ + parseArgs(argc, (char** const)argv); + SIM_GET_NUM_CPU(global_params[PARAM_THREAD]); + + printf("Creating gene and segments... "); + fflush(stdout); + + long geneLength = global_params[PARAM_GENE]; + long segmentLength = global_params[PARAM_SEGMENT]; + long minNumSegment = global_params[PARAM_NUMBER]; + long numThread = global_params[PARAM_THREAD]; + + TM_STARTUP(numThread); + P_MEMORY_STARTUP(numThread); + thread_startup(numThread); + + random_t* randomPtr = random_alloc(); + assert(randomPtr != NULL); + random_seed(randomPtr, 0); + + gene_t* genePtr = gene_alloc(geneLength); + assert( genePtr != NULL); + gene_create(genePtr, randomPtr); + char* gene = genePtr->contents; + + segments_t* segmentsPtr = segments_alloc(segmentLength, minNumSegment); + assert(segmentsPtr != NULL); + segments_create(segmentsPtr, genePtr, randomPtr); + sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr); + assert(sequencerPtr != NULL); + + puts("done."); + printf("Gene length = %li\n", genePtr->length); + printf("Segment length = %li\n", segmentsPtr->length); + printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr)); + fflush(stdout); + + /* Benchmark */ + printf("Sequencing gene... "); + fflush(stdout); + TIMER_READ(start); + GOTO_SIM(); +#ifdef OTM +#pragma omp parallel + { + sequencer_run(sequencerPtr); + } +#else + thread_start(sequencer_run, (void*)sequencerPtr); +#endif + GOTO_REAL(); + TIMER_READ(stop); + puts("done."); + printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop)); + fflush(stdout); + + /* Check result */ + { + char* sequence = sequencerPtr->sequence; + int result = strcmp(gene, sequence); + printf("Sequence matches gene: %s\n", (result ? "no" : "yes")); + if (result) { + printf("gene = %s\n", gene); + printf("sequence = %s\n", sequence); + } + fflush(stdout); + assert(strlen(sequence) >= strlen(gene)); + } + + /* Clean up */ + printf("Deallocating memory... "); + fflush(stdout); + sequencer_free(sequencerPtr); + segments_free(segmentsPtr); + gene_free(genePtr); + random_free(randomPtr); + puts("done."); + fflush(stdout); + + TM_SHUTDOWN(); + P_MEMORY_SHUTDOWN(); + + GOTO_SIM(); + + thread_shutdown(); + + MAIN_RETURN(0); +} + + + +/* ============================================================================= + * + * End of genome.c + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/genome.o b/Robust/src/Benchmarks/SingleTM/genome/genome.o new file mode 100644 index 00000000..1566a82f Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/genome.o differ diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Bitmap.java b/Robust/src/Benchmarks/SingleTM/genome/java/Bitmap.java new file mode 100644 index 00000000..0e437075 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Bitmap.java @@ -0,0 +1,212 @@ +public class Bitmap { + public long numBit; + public long numWord; + public long bits[]; + + private static NUM_BIT_PER_BYTE = 8; + private static NUM_BIT_PER_WORD = (8) * NUM_BIT_PER_BYTE) + + + /* ============================================================================= + * bitmap_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Bitmap(long myNumBit) { + + numBit = myNumBit; + numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD); + + bits = new long[numWord]; + + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = 0; + } + } + + Bitmap(Bitmap myBitMap) { + numBit = myBitMap.numBit; + numWord = myBitMap.numWord; + bits = new long[numWord]; + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = myBitMap.bits[i]; + } + } + + /* ============================================================================= + * Pbitmap_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + //bitmap_t* Pbitmap_alloc (long numBit) { } + + + /* ============================================================================= + * bitmap_free + * ============================================================================= + */ + //void bitmap_free (bitmap_t* bitmapPtr); + + + /* ============================================================================= + * Pbitmap_free + * ============================================================================= + */ + //void Pbitmap_free (bitmap_t* bitmapPtr); + + + /* ============================================================================= + * bitmap_set + * -- Sets ith bit to 1 + * -- Returns TRUE on success, else FALSE + * ============================================================================= + */ + boolean set (long i) { + if ((i < 0) || (i >= numBit)) { + return FALSE; + } + + bits[i/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD)); + + return TRUE; + } + + + /* ============================================================================= + * bitmap_clear + * -- Clears ith bit to 0 + * -- Returns TRUE on success, else FALSE + * ============================================================================= + */ + boolean clear (long i) { + if ((i < 0) || (i >= numBit)) { + return FALSE; + } + + bits[i/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD)); + + return TRUE; + } + + + /* ============================================================================= + * bitmap_clearAll + * -- Clears all bit to 0 + * ============================================================================= + */ + void clearAll () { + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = 0; + } + } + + + /* ============================================================================= + * bitmap_isSet + * -- Returns TRUE if ith bit is set, else FALSE + * ============================================================================= + */ + boolean isSet (long i) { + if ((i >= 0) && (i < numBit) && + (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) { + return TRUE; + } + + return FALSE; + } + + + /* ============================================================================= + * bitmap_findClear + * -- Returns index of first clear bit + * -- If start index is negative, will start from beginning + * -- If all bits are set, returns -1 + * ============================================================================= + */ + long findClear (long startIndex) { + long i; + + for (i = MAX(startIndex, 0); i < numBit; i++) { + if (!(bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) { + return i; + } + } + + return -1; + } + + + /* ============================================================================= + * bitmap_findSet + * -- Returns index of first set bit + * -- If all bits are clear, returns -1 + * ============================================================================= + */ + long findSet (long startIndex) { + long i; + + for (i = MAX(startIndex, 0); i < numBit; i++) { + if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) { + return i; + } + } + + return -1; + } + + + /* ============================================================================= + * bitmap_getNumClear + * ============================================================================= + */ + long getNumClear () { + return (numBit - getNumSet()); + } + + + /* ============================================================================= + * bitmap_getNumSet + * ============================================================================= + */ + long getNumSet () { + long i; + long count = 0; + + for (i = 0; i < numBit; i++) { + if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) { + count++; + } + } + + return count; + } + + /* ============================================================================= + * bitmap_copy + * ============================================================================= + */ + //void copy(bitmap_t* dstPtr, bitmap_t* srcPtr); + // SEE COPY CONSTRUCTOR + + /* ============================================================================= + * bitmap_toggleAll + * ============================================================================= + */ + void toggleAll () { + long w; + for (w = 0; w < numWord; w++) { + bits[w] ^= -1L; + } + } + + long DIVIDE_AND_ROUND_UP(long a, long b) { + return (a/b) + (((a % b) > 0) ? (1) : (0)); + } + + long MAX(long a, long b) { + return (a > b) ? a : b; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Bitmap.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/Bitmap.java~ new file mode 100644 index 00000000..0e437075 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Bitmap.java~ @@ -0,0 +1,212 @@ +public class Bitmap { + public long numBit; + public long numWord; + public long bits[]; + + private static NUM_BIT_PER_BYTE = 8; + private static NUM_BIT_PER_WORD = (8) * NUM_BIT_PER_BYTE) + + + /* ============================================================================= + * bitmap_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Bitmap(long myNumBit) { + + numBit = myNumBit; + numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD); + + bits = new long[numWord]; + + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = 0; + } + } + + Bitmap(Bitmap myBitMap) { + numBit = myBitMap.numBit; + numWord = myBitMap.numWord; + bits = new long[numWord]; + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = myBitMap.bits[i]; + } + } + + /* ============================================================================= + * Pbitmap_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + //bitmap_t* Pbitmap_alloc (long numBit) { } + + + /* ============================================================================= + * bitmap_free + * ============================================================================= + */ + //void bitmap_free (bitmap_t* bitmapPtr); + + + /* ============================================================================= + * Pbitmap_free + * ============================================================================= + */ + //void Pbitmap_free (bitmap_t* bitmapPtr); + + + /* ============================================================================= + * bitmap_set + * -- Sets ith bit to 1 + * -- Returns TRUE on success, else FALSE + * ============================================================================= + */ + boolean set (long i) { + if ((i < 0) || (i >= numBit)) { + return FALSE; + } + + bits[i/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD)); + + return TRUE; + } + + + /* ============================================================================= + * bitmap_clear + * -- Clears ith bit to 0 + * -- Returns TRUE on success, else FALSE + * ============================================================================= + */ + boolean clear (long i) { + if ((i < 0) || (i >= numBit)) { + return FALSE; + } + + bits[i/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD)); + + return TRUE; + } + + + /* ============================================================================= + * bitmap_clearAll + * -- Clears all bit to 0 + * ============================================================================= + */ + void clearAll () { + int i = 0; + for(i = 0; i < numWord; i++) { + bits[i] = 0; + } + } + + + /* ============================================================================= + * bitmap_isSet + * -- Returns TRUE if ith bit is set, else FALSE + * ============================================================================= + */ + boolean isSet (long i) { + if ((i >= 0) && (i < numBit) && + (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) { + return TRUE; + } + + return FALSE; + } + + + /* ============================================================================= + * bitmap_findClear + * -- Returns index of first clear bit + * -- If start index is negative, will start from beginning + * -- If all bits are set, returns -1 + * ============================================================================= + */ + long findClear (long startIndex) { + long i; + + for (i = MAX(startIndex, 0); i < numBit; i++) { + if (!(bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) { + return i; + } + } + + return -1; + } + + + /* ============================================================================= + * bitmap_findSet + * -- Returns index of first set bit + * -- If all bits are clear, returns -1 + * ============================================================================= + */ + long findSet (long startIndex) { + long i; + + for (i = MAX(startIndex, 0); i < numBit; i++) { + if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) { + return i; + } + } + + return -1; + } + + + /* ============================================================================= + * bitmap_getNumClear + * ============================================================================= + */ + long getNumClear () { + return (numBit - getNumSet()); + } + + + /* ============================================================================= + * bitmap_getNumSet + * ============================================================================= + */ + long getNumSet () { + long i; + long count = 0; + + for (i = 0; i < numBit; i++) { + if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) { + count++; + } + } + + return count; + } + + /* ============================================================================= + * bitmap_copy + * ============================================================================= + */ + //void copy(bitmap_t* dstPtr, bitmap_t* srcPtr); + // SEE COPY CONSTRUCTOR + + /* ============================================================================= + * bitmap_toggleAll + * ============================================================================= + */ + void toggleAll () { + long w; + for (w = 0; w < numWord; w++) { + bits[w] ^= -1L; + } + } + + long DIVIDE_AND_ROUND_UP(long a, long b) { + return (a/b) + (((a % b) > 0) ? (1) : (0)); + } + + long MAX(long a, long b) { + return (a > b) ? a : b; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Gene.java b/Robust/src/Benchmarks/SingleTM/genome/java/Gene.java new file mode 100644 index 00000000..df2f306d --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Gene.java @@ -0,0 +1,31 @@ +public class Gene { + public long length; + public String contents; + public Bitmap startBitmapPtr; /* used for creating segments */ + + Gene(long myLength) { + length = myLength; + contents = ""; + startBitmapPtr = new BitMap(length); + } + + +/* ============================================================================= + * gene_create + * -- Populate contents with random gene + * ============================================================================= + */ + void create (Random randomObj) { + long i; + char nucleotides[] = { + NUCLEOTIDE_ADENINE, + NUCLEOTIDE_CYTOSINE, + NUCLEOTIDE_GUANINE, + NUCLEOTIDE_THYMINE, + }; + + for (i = 0; i < length; i++) { + contents[i] = nucleotides[(random_generate(randomObj)% NUCLEOTIDE_NUM_TYPE)]; + } + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Gene.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/Gene.java~ new file mode 100644 index 00000000..c78c495b --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Gene.java~ @@ -0,0 +1,32 @@ +public class Gene { + public long length; + public String contents; + public Bitmap startBitmapPtr; /* used for creating segments */ + + Gene(long myLength) { + length = myLength; + contents = ""; + startBitmapPtr = new BitMap(length); + } + + +/* ============================================================================= + * gene_create + * -- Populate contents with random gene + * ============================================================================= + */ + void create (Random randomObj) { + long i; + char nucleotides[] = { + NUCLEOTIDE_ADENINE, + NUCLEOTIDE_CYTOSINE, + NUCLEOTIDE_GUANINE, + NUCLEOTIDE_THYMINE, + }; + + for (i = 0; i < length; i++) { + contents[i] = + nucleotides[(random_generate(randomObj)% NUCLEOTIDE_NUM_TYPE)]; + } + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Genome.java b/Robust/src/Benchmarks/SingleTM/genome/java/Genome.java new file mode 100644 index 00000000..610f8838 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Genome.java @@ -0,0 +1,147 @@ +/* +"gene.h" +"random.h" +"segments.h" +"sequencer.h" +"thread.h" +"timer.h" +"tm.h" +"vector.h" +"bitmap.h" + +*/ + +public class Genome { + long geneLength; + long segmentLength; + long minNumSegment; + long numThread; + + Genome(String x[]) { + parseCmdLine(x); + } + + public static void main(String x[]){ + +/* TIMER_T start; */ +/* TIMER_T stop; */ + +/* GOTO_REAL(); */ + + /* Initialization */ +/* parseArgs(argc, (char** const)argv); */ +/* SIM_GET_NUM_CPU(global_params[PARAM_THREAD]); */ + + System.out.print("Creating gene and segments... "); + Genome g = new Genome(x); + + +/* TM_STARTUP(numThread); */ +/* P_MEMORY_STARTUP(numThread); */ +/* thread_startup(numThread); */ + + Random randomPtr = new Random(); + random_alloc(randomPtr); + random_seed(randomPtr, 0); + + Gene genePtr = new Gene(geneLength); + genePtr.create(randomPtr); + String gene = genePtr.contents; + + Segments segmentsPtr = new Segments(segmentLength, minNumSegment); + segmentsPtr.create(genePtr, randomPtr); + sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr); + assert(sequencerPtr != NULL); + + puts("done."); + printf("Gene length = %li\n", genePtr->length); + printf("Segment length = %li\n", segmentsPtr->length); + printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr)); + fflush(stdout); + + /* Benchmark */ + printf("Sequencing gene... "); + fflush(stdout); + TIMER_READ(start); + GOTO_SIM(); +#ifdef OTM +#pragma omp parallel + { + sequencer_run(sequencerPtr); + } +#else + thread_start(sequencer_run, (void*)sequencerPtr); +#endif + GOTO_REAL(); + TIMER_READ(stop); + puts("done."); + printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop)); + fflush(stdout); + + /* Check result */ + { + char* sequence = sequencerPtr->sequence; + int result = strcmp(gene, sequence); + printf("Sequence matches gene: %s\n", (result ? "no" : "yes")); + if (result) { + printf("gene = %s\n", gene); + printf("sequence = %s\n", sequence); + } + fflush(stdout); + assert(strlen(sequence) >= strlen(gene)); + } + + /* Clean up */ + printf("Deallocating memory... "); + fflush(stdout); + sequencer_free(sequencerPtr); + segments_free(segmentsPtr); + gene_free(genePtr); + random_free(randomPtr); + puts("done."); + fflush(stdout); + + TM_SHUTDOWN(); + P_MEMORY_SHUTDOWN(); + + GOTO_SIM(); + + thread_shutdown(); + + MAIN_RETURN(0); + } + + public static void parseCmdLine(String args[]) { + + int i = 0; + String arg; + while (i < args.length && args[i].startsWith("-")) { + arg = args[i++]; + //check options + if(arg.equals("-g")) { + if(i < args.length) { + geneLength = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-s")) { + if(i < args.length) { + segmentLength = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-n")) { + if(i < args.length) { + minNumSegment = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-t")) { + if(i < args.length) { + numThread = new Integer(args[i++]).intValue(); + } + } + } + } +} + +public enum param_types { + PARAM_GENE /*= (unsigned char)'g'*/, + PARAM_NUMBER /*= (unsigned char)'n'*/, + PARAM_SEGMENT /*= (unsigned char)'s'*/, + PARAM_THREAD /*= (unsigned char)'t',*/ +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Genome.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/Genome.java~ new file mode 100644 index 00000000..40e407e3 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Genome.java~ @@ -0,0 +1,148 @@ +/* +"gene.h" +"random.h" +"segments.h" +"sequencer.h" +"thread.h" +"timer.h" +"tm.h" +"vector.h" +"bitmap.h" + +*/ + +public class Genome { + long geneLength; + long segmentLength; + long minNumSegment; + long numThread; + + Genome(String x[]) { + parseCmdLine(x); + } + + public static void main(String x[]){ + +/* TIMER_T start; */ +/* TIMER_T stop; */ + +/* GOTO_REAL(); */ + + /* Initialization */ +/* parseArgs(argc, (char** const)argv); */ +/* SIM_GET_NUM_CPU(global_params[PARAM_THREAD]); */ + + System.out.print("Creating gene and segments... "); + Genome g = new Genome(x); + + +/* TM_STARTUP(numThread); */ +/* P_MEMORY_STARTUP(numThread); */ +/* thread_startup(numThread); */ + + Random randomPtr = new Random(); + random_alloc(randomPtr); + random_seed(randomPtr, 0); + + Gene genePtr = new Gene(geneLength); + genePtr.create(randomPtr); + String gene = genePtr.contents; + + Segments segmentsPtr = new Segments(segmentLength, minNumSegment); + assert(segmentsPtr != NULL); + segments_create(segmentsPtr, genePtr, randomPtr); + sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr); + assert(sequencerPtr != NULL); + + puts("done."); + printf("Gene length = %li\n", genePtr->length); + printf("Segment length = %li\n", segmentsPtr->length); + printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr)); + fflush(stdout); + + /* Benchmark */ + printf("Sequencing gene... "); + fflush(stdout); + TIMER_READ(start); + GOTO_SIM(); +#ifdef OTM +#pragma omp parallel + { + sequencer_run(sequencerPtr); + } +#else + thread_start(sequencer_run, (void*)sequencerPtr); +#endif + GOTO_REAL(); + TIMER_READ(stop); + puts("done."); + printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop)); + fflush(stdout); + + /* Check result */ + { + char* sequence = sequencerPtr->sequence; + int result = strcmp(gene, sequence); + printf("Sequence matches gene: %s\n", (result ? "no" : "yes")); + if (result) { + printf("gene = %s\n", gene); + printf("sequence = %s\n", sequence); + } + fflush(stdout); + assert(strlen(sequence) >= strlen(gene)); + } + + /* Clean up */ + printf("Deallocating memory... "); + fflush(stdout); + sequencer_free(sequencerPtr); + segments_free(segmentsPtr); + gene_free(genePtr); + random_free(randomPtr); + puts("done."); + fflush(stdout); + + TM_SHUTDOWN(); + P_MEMORY_SHUTDOWN(); + + GOTO_SIM(); + + thread_shutdown(); + + MAIN_RETURN(0); + } + + public static void parseCmdLine(String args[]) { + + int i = 0; + String arg; + while (i < args.length && args[i].startsWith("-")) { + arg = args[i++]; + //check options + if(arg.equals("-g")) { + if(i < args.length) { + geneLength = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-s")) { + if(i < args.length) { + segmentLength = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-n")) { + if(i < args.length) { + minNumSegment = new Integer(args[i++]).intValue(); + } + } else if(arg.equals("-t")) { + if(i < args.length) { + numThread = new Integer(args[i++]).intValue(); + } + } + } + } +} + +public enum param_types { + PARAM_GENE /*= (unsigned char)'g'*/, + PARAM_NUMBER /*= (unsigned char)'n'*/, + PARAM_SEGMENT /*= (unsigned char)'s'*/, + PARAM_THREAD /*= (unsigned char)'t',*/ +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/List.java b/Robust/src/Benchmarks/SingleTM/genome/java/List.java new file mode 100644 index 00000000..310e51b8 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/List.java @@ -0,0 +1,256 @@ +public class List { + +private class list_node { + void* dataPtr; + struct list_node* nextPtr; +} + +typedef list_node_t* list_iter_t; + + list_node_t head; + long (*compare)(const void*, const void*); /* returns {-1,0,1}, 0 -> equal */ + long size; +} list_t; + + +/* ============================================================================= + * list_iter_reset + * ============================================================================= + */ +void +list_iter_reset (list_iter_t* itPtr, list_t* listPtr); + + +/* ============================================================================= + * TMlist_iter_reset + * ============================================================================= + */ +TM_CALLABLE +void +TMlist_iter_reset (TM_ARGDECL list_iter_t* itPtr, list_t* listPtr); + + +/* ============================================================================= + * list_iter_hasNext + * ============================================================================= + */ +bool_t +list_iter_hasNext (list_iter_t* itPtr, list_t* listPtr); + + +/* ============================================================================= + * TMlist_iter_hasNext + * ============================================================================= + */ +TM_CALLABLE +bool_t +TMlist_iter_hasNext (TM_ARGDECL list_iter_t* itPtr, list_t* listPtr); + + +/* ============================================================================= + * list_iter_next + * ============================================================================= + */ +void* +list_iter_next (list_iter_t* itPtr, list_t* listPtr); + + +/* ============================================================================= + * TMlist_iter_next + * ============================================================================= + */ +TM_CALLABLE +void* +TMlist_iter_next (TM_ARGDECL list_iter_t* itPtr, list_t* listPtr); + + +/* ============================================================================= + * list_alloc + * -- If NULL passed for 'compare' function, will compare data pointer addresses + * -- Returns NULL on failure + * ============================================================================= + */ +list_t* +list_alloc (long (*compare)(const void*, const void*)); + + +/* ============================================================================= + * Plist_alloc + * -- If NULL passed for 'compare' function, will compare data pointer addresses + * -- Returns NULL on failure + * ============================================================================= + */ +list_t* +Plist_alloc (long (*compare)(const void*, const void*)); + + +/* ============================================================================= + * TMlist_alloc + * -- If NULL passed for 'compare' function, will compare data pointer addresses + * -- Returns NULL on failure + * ============================================================================= + */ +list_t* +TMlist_alloc (TM_ARGDECL long (*compare)(const void*, const void*)); + + +/* ============================================================================= + * list_free + * ============================================================================= + */ +void +list_free (list_t* listPtr); + + +/* ============================================================================= + * Plist_free + * -- If NULL passed for 'compare' function, will compare data pointer addresses + * -- Returns NULL on failure + * ============================================================================= + */ +void +Plist_free (list_t* listPtr); + + +/* ============================================================================= + * TMlist_free + * -- If NULL passed for 'compare' function, will compare data pointer addresses + * -- Returns NULL on failure + * ============================================================================= + */ +void +TMlist_free (TM_ARGDECL list_t* listPtr); + + + +/* ============================================================================= + * list_isEmpty + * -- Return TRUE if list is empty, else FALSE + * ============================================================================= + */ +bool_t +list_isEmpty (list_t* listPtr); + + +/* ============================================================================= + * TMlist_isEmpty + * -- Return TRUE if list is empty, else FALSE + * ============================================================================= + */ +TM_CALLABLE +bool_t +TMlist_isEmpty (TM_ARGDECL list_t* listPtr); + + +/* ============================================================================= + * list_getSize + * -- Returns size of list + * ============================================================================= + */ +long +list_getSize (list_t* listPtr); + + +/* ============================================================================= + * TMlist_getSize + * -- Returns size of list + * ============================================================================= + */ +TM_CALLABLE +long +TMlist_getSize (TM_ARGDECL list_t* listPtr); + + +/* ============================================================================= + * list_find + * -- Returns NULL if not found, else returns pointer to data + * ============================================================================= + */ +void* +list_find (list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * TMlist_find + * -- Returns NULL if not found, else returns pointer to data + * ============================================================================= + */ +TM_CALLABLE +void* +TMlist_find (TM_ARGDECL list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * list_insert + * -- Return TRUE on success, else FALSE + * ============================================================================= + */ +bool_t +list_insert (list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * Plist_insert + * -- Return TRUE on success, else FALSE + * ============================================================================= + */ +bool_t +Plist_insert (list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * TMlist_insert + * -- Return TRUE on success, else FALSE + * ============================================================================= + */ +TM_CALLABLE +bool_t +TMlist_insert (TM_ARGDECL list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * list_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +list_remove (list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * Plist_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +Plist_remove (list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * TMlist_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +TM_CALLABLE +bool_t +TMlist_remove (TM_ARGDECL list_t* listPtr, void* dataPtr); + + +/* ============================================================================= + * list_clear + * -- Removes all elements + * ============================================================================= + */ +void +list_clear (list_t* listPtr); + + +/* ============================================================================= + * Plist_clear + * -- Removes all elements + * ============================================================================= + */ +void +Plist_clear (list_t* listPtr); + +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/List.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/List.java~ new file mode 100644 index 00000000..0b409c66 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/List.java~ @@ -0,0 +1,4 @@ +public class List { + + +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Random.java b/Robust/src/Benchmarks/SingleTM/genome/java/Random.java new file mode 100644 index 00000000..819abe26 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Random.java @@ -0,0 +1,96 @@ +public class Random { + long[] mt; + int mti; + long RANDOM_DEFAULT_SEED; + /* period parameter */ + int N; + int M; + long MATRIX_A; + long UPPER_MASK; + long LOWER_MASK; + + public Random() { + RANDOM_DEFAULT_SEED = 0L; + N = 624; + M = 397; + mt = new long[N]; + mti = 0; + MATRIX_A = 0x9908b0dfL; /* constant vector a */ + UPPER_MASK = 0x80000000L; /* most significant w-r bits */ + LOWER_MASK = 0x7fffffffL; /* least significant r bits */ + } + + public Random random_alloc(Random rand) { + init_genrand(rand, rand.RANDOM_DEFAULT_SEED); + return rand; + } + + /* initializes mt[N] with a seed */ + public void init_genrand(Random rand, long s) { + int mti; + + rand.mt[0]= s & 0xFFFFFFFFL; + for (mti=1; mti> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + rand.mt[mti] &= 0xFFFFFFFFL; + /* for >32 bit machines */ + } + + rand.mti = mti; + } + + public void random_seed(Random rand, long seed) { + init_genrand(rand, seed); + } + + public long random_generate(Random rand) { + return genrand_int32(rand); + } + + //public static long genrand_int32(long[] mt, long mtiPtr) { + public long genrand_int32(Random rand) { + long y; + long[] mag01= new long[2]; + mag01[0] = 0x0L; + mag01[1] = rand.MATRIX_A; + int mti = rand.mti; + + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= rand.N) { /* generate N words at one time */ + int kk; + + if (mti == rand.N+1) /* if init_genrand() has not been called, */ + init_genrand(rand, 5489L); /* a default initial seed is used */ + + for (kk=0;kk> 1) ^ mag01[(int)(y & 0x1L)]; + } + for (;kk> 1) ^ mag01[(int)(y & 0x1L)]; + } + y = (rand.mt[N-1]&rand.UPPER_MASK)|(rand.mt[0]&LOWER_MASK); + rand.mt[N-1] = rand.mt[M-1] ^ (y >> 1) ^ mag01[(int)(y & 0x1L)]; + + mti = 0; + } + + y = rand.mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680L; + y ^= (y << 15) & 0xefc60000L; + y ^= (y >> 18); + + rand.mti = mti; + + return y; + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Segments.java b/Robust/src/Benchmarks/SingleTM/genome/java/Segments.java new file mode 100644 index 00000000..d685eef4 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Segments.java @@ -0,0 +1,70 @@ +public class Segments { + public long length; + public long minNum; + Vector contentsPtr; +/* private: */ + String strings[]; + + Segments (long myLength, long myMinNum) { + minNum = myMinNum; + length = myLength; + + contentsPtr = new Vector(minNum); + + } + + + /* ============================================================================= + * segments_create + * -- Populates 'contentsPtr' + * ============================================================================= + */ + void create (Gene genePtr, Random randomPtr) { + String geneString; + long geneLength; + Bitmap startBitmapPtr; + long numStart; + long i; + long maxZeroRunLength; + + geneString = genePtr.contents; + geneLength = genePtr.length; + startBitmapPtr = genePtr.startBitmapPtr; + numStart = geneLength - segmentLength + 1; + + /* Pick some random segments to start */ + for (i = 0; i < minNumSegment; i++) { + long j = (long)(random_generate(randomPtr) % numStart); + boolean status = startBitmapPtr.set(j); + strings[i] = geneString[j]; + segmentsContentsPtr.add(strings[i]); + } + + /* Make sure segment covers start */ + i = 0; + if (!startBitmapPtr.isSet(i)) { + String string; + string = geneString[i]; + segmentsContentsPtr.add(string); + startBitmapPtr.set(i); + } + + /* Add extra segments to fill holes and ensure overlap */ + maxZeroRunLength = length - 1; + for (i = 0; i < numStart; i++) { + long i_stop = MIN((i+maxZeroRunLength), numStart); + for ( /* continue */; i < i_stop; i++) { + if (startBitmapPtr.isSet(i)) { + break; + } + } + if (i == i_stop) { + /* Found big enough hole */ + i = i - 1; + String string = geneString[i]; + segmentsContentsPtr.add(string); + startBitmapPtr.set(i); + } + } + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Segments.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/Segments.java~ new file mode 100644 index 00000000..accbdcb3 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Segments.java~ @@ -0,0 +1,71 @@ +public class Segments { + public long length; + public long minNum; + Vector contentsPtr; +/* private: */ + String strings[]; + + Segments (long myLength, long myMinNum) { + minNum = myMinNum; + length = myLength; + + contentsPtr = new Vector(minNum); + + } + + + /* ============================================================================= + * segments_create + * -- Populates 'contentsPtr' + * ============================================================================= + */ + void create (Gene genePtr, Random randomPtr) { + String geneString; + long geneLength; + Bitmap startBitmapPtr; + long numStart; + long i; + long maxZeroRunLength; + + geneString = genePtr.contents; + geneLength = genePtr.length; + startBitmapPtr = genePtr.startBitmapPtr; + numStart = geneLength - segmentLength + 1; + + /* Pick some random segments to start */ + for (i = 0; i < minNumSegment; i++) { + long j = (long)(random_generate(randomPtr) % numStart); + boolean status = startBitmapPtr.set(j); + strings[i] = geneString[j]; + segmentsContentsPtr.add(strings[i]); + } + + /* Make sure segment covers start */ + i = 0; + if (!startBitmapPtr.isSet(i)) { + String string; + string = geneString[i]; + segmentsContentsPtr.add(string); + startBitmapPtr.set(i); + } + + /* Add extra segments to fill holes and ensure overlap */ + maxZeroRunLength = length - 1; + for (i = 0; i < numStart; i++) { + long i_stop = MIN((i+maxZeroRunLength), numStart); + for ( /* continue */; i < i_stop; i++) { + if (startBitmapPtr.isSet(i)) { + break; + } + } + if (i == i_stop) { + /* Found big enough hole */ + i = i - 1; + String string = geneString[i]; + segmentsContentsPtr.add(string); + startBitmapPtr.set(i); + assert(status); + } + } + } +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Sequencer.java b/Robust/src/Benchmarks/SingleTM/genome/java/Sequencer.java new file mode 100644 index 00000000..a8c81b9f --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Sequencer.java @@ -0,0 +1,510 @@ +public class Sequencer { + + public char* sequence; + + public Segments segmentsPtr; + + /* For removing duplicate segments */ + Hashmap uniqueSegmentsPtr; + + /* For matching segments */ + endInfoEntry endInfoEntries[]; + Table startHashToConstructEntryTables[]; + + /* For constructing sequence */ + constructEntry constructEntries[]; + Table hashToConstructEntryTable; + + /* For deallocation */ + long segmentLength; + + + /* ============================================================================= + * sequencer_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Sequencer (long myGeneLength, long mySegmentLength, Segments mySegmentsPtr) { + + long maxNumUniqueSegment = myGeneLength - mySegmentLength + 1; + long i; + + uniqueSegmentsPtr = new Hashmap(myGeneLength); + + /* For finding a matching entry */ + endInfoEntries = new endInfoEntry[maxNumUniqueSegment]; + for (i = 0; i < maxNumUniqueSegment; i++) { + endInfoEntries[i].isEnd = TRUE; + endInfoEntries[i].jumpToNext = 1; + } + startHashToConstructEntryTables = new Table[mySegmentLength]; + for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */ + startHashToConstructEntryTables[i] = new Table(myGeneLength); + } + segmentLength = mySegmentLength; + + /* For constructing sequence */ + constructEntries = new ContructEntry[maxNumUniqueSegment]; + + for (i= 0; i < maxNumUniqueSegment; i++) { + constructEntries[i].isStart = TRUE; + constructEntries[i].segment = NULL; + constructEntries[i].endHash = 0; + constructEntries[i].startPtr = constructEntries[i]; + constructEntries[i].nextPtr = NULL; + constructEntries[i].endPtr = constructEntries[i]; + constructEntries[i].overlap = 0; + constructEntries[i].length = segmentLength; + } + hashToConstructEntryTable = new Table(geneLength); + + segmentsPtr = mySegmentsPtr; + } + + + /* ============================================================================= + * sequencer_run + * ============================================================================= + */ + + void run () { + + //TM_THREAD_ENTER(); + + long threadId = thread_getId(); + + //Sequencer sequencerPtr = (sequencer_t*)argPtr; + + Hashmap uniqueSegmentsPtr; + endInfoEntry endInfoEntries[]; + Hashmap startHashToConstructEntryTables[]; + constructEntry constructEntries[]; + Hashmap hashToConstructEntryTable; + + Vector segmentsContentsPtr = segmentsPtr.contentsPtr; + long numSegment = segmentsContentsPtr.size(); + long segmentLength = segmentsPtr.length; + + long i; + long j; + long i_start; + long i_stop; + long numUniqueSegment; + long substringLength; + long entryIndex; + + /* + * Step 1: Remove duplicate segments + */ +#if defined(HTM) || defined(STM) + long numThread = thread_getNumThread(); + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = numSegment; + } else { + i_stop = i_start + partitionSize; + } + } +#else /* !(HTM || STM) */ + i_start = 0; + i_stop = numSegment; +#endif /* !(HTM || STM) */ + for (i = i_start; i < i_stop; i+=CHUNK_STEP1) { + TM_BEGIN(); + { + long ii; + long ii_stop = MIN(i_stop, (i+CHUNK_STEP1)); + for (ii = i; ii < ii_stop; ii++) { + string segment = segmentsContentsPtr.get(ii); + TMHASHTABLE_INSERT(uniqueSegmentsPtr, + segment, + segment); + } /* ii */ + } + TM_END(); + } + + thread_barrier_wait(); + + /* + * Step 2a: Iterate over unique segments and compute hashes. + * + * For the gene "atcg", the hashes for the end would be: + * + * "t", "tc", and "tcg" + * + * And for the gene "tcgg", the hashes for the start would be: + * + * "t", "tc", and "tcg" + * + * The names are "end" and "start" because if a matching pair is found, + * they are the substring of the end part of the pair and the start + * part of the pair respectively. In the above example, "tcg" is the + * matching substring so: + * + * (end) (start) + * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg") + */ + + /* uniqueSegmentsPtr is constant now */ + numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr); + entryIndex = 0; + +#if defined(HTM) || defined(STM) + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + long num = uniqueSegmentsPtr->numBucket; + long partitionSize = (num + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = num; + } else { + i_stop = i_start + partitionSize; + } + } + { + /* Approximate disjoint segments of element allocation in constructEntries */ + long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + entryIndex = threadId * partitionSize; + } +#else /* !(HTM || STM) */ + i_start = 0; + i_stop = uniqueSegmentsPtr->numBucket; + entryIndex = 0; +#endif /* !(HTM || STM) */ + + for (i = i_start; i < i_stop; i++) { + + list_t* chainPtr = uniqueSegmentsPtr->buckets[i]; + list_iter_t it; + list_iter_reset(&it, chainPtr); + + while (list_iter_hasNext(&it, chainPtr)) { + + char* segment = + (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr; + constructEntry_t* constructEntryPtr; + long j; + ulong_t startHash; + bool_t status; + + /* Find an empty constructEntries entry */ + TM_BEGIN(); + while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) { + entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */ + } + constructEntryPtr = &constructEntries[entryIndex]; + TM_SHARED_WRITE_P(constructEntryPtr->segment, segment); + TM_END(); + entryIndex = (entryIndex + 1) % numUniqueSegment; + + /* + * Save hashes (sdbm algorithm) of segment substrings + * + * endHashes will be computed for shorter substrings after matches + * have been made (in the next phase of the code). This will reduce + * the number of substrings for which hashes need to be computed. + * + * Since we can compute startHashes incrementally, we go ahead + * and compute all of them here. + */ + /* constructEntryPtr is local now */ + constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]); + + startHash = 0; + for (j = 1; j < segmentLength; j++) { + startHash = (ulong_t)segment[j-1] + + (startHash << 6) + (startHash << 16) - startHash; + TM_BEGIN(); + status = TMTABLE_INSERT(startHashToConstructEntryTables[j], + (ulong_t)startHash, + (void*)constructEntryPtr ); + TM_END(); + assert(status); + } + + /* + * For looking up construct entries quickly + */ + startHash = (ulong_t)segment[j-1] + + (startHash << 6) + (startHash << 16) - startHash; + TM_BEGIN(); + status = TMTABLE_INSERT(hashToConstructEntryTable, + (ulong_t)startHash, + (void*)constructEntryPtr); + TM_END(); + assert(status); + } + } + + thread_barrier_wait(); + + /* + * Step 2b: Match ends to starts by using hash-based string comparison. + */ + for (substringLength = segmentLength-1; substringLength > 0; substringLength--) { + + table_t* startHashToConstructEntryTablePtr = + startHashToConstructEntryTables[substringLength]; + list_t** buckets = startHashToConstructEntryTablePtr->buckets; + long numBucket = startHashToConstructEntryTablePtr->numBucket; + + long index_start; + long index_stop; + +#if defined(HTM) || defined(STM) + { + /* Choose disjoint segments [index_start,index_stop) for each thread */ + long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + index_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + index_stop = numUniqueSegment; + } else { + index_stop = index_start + partitionSize; + } + } +#else /* !(HTM || STM) */ + index_start = 0; + index_stop = numUniqueSegment; +#endif /* !(HTM || STM) */ + + /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */ + for (entryIndex = index_start; + entryIndex < index_stop; + entryIndex += endInfoEntries[entryIndex].jumpToNext) + { + if (!endInfoEntries[entryIndex].isEnd) { + continue; + } + + /* ConstructEntries[entryIndex] is local data */ + constructEntry_t* endConstructEntryPtr = + &constructEntries[entryIndex]; + char* endSegment = endConstructEntryPtr->segment; + ulong_t endHash = endConstructEntryPtr->endHash; + + list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */ + list_iter_t it; + list_iter_reset(&it, chainPtr); + + /* Linked list at chainPtr is constant */ + while (list_iter_hasNext(&it, chainPtr)) { + + constructEntry_t* startConstructEntryPtr = + (constructEntry_t*)list_iter_next(&it, chainPtr); + char* startSegment = startConstructEntryPtr->segment; + long newLength = 0; + + /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */ + TM_BEGIN(); + + /* Check if matches */ + if (TM_SHARED_READ(startConstructEntryPtr->isStart) && + (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) && + (strncmp(startSegment, + &endSegment[segmentLength - substringLength], + substringLength) == 0)) + { + TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE); + + constructEntry_t* startConstructEntry_endPtr; + constructEntry_t* endConstructEntry_startPtr; + + /* Update endInfo (appended something so no longer end) */ + TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE); + + /* Update segment chain construct info */ + startConstructEntry_endPtr = + (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr); + endConstructEntry_startPtr = + (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr); + + assert(startConstructEntry_endPtr); + assert(endConstructEntry_startPtr); + TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr, + endConstructEntry_startPtr); + TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr, + startConstructEntryPtr); + TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr, + startConstructEntry_endPtr); + TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength); + newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) + + (long)TM_SHARED_READ(startConstructEntryPtr->length) - + substringLength; + TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength); + } /* if (matched) */ + + TM_END(); + + if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */ + break; + } + } /* iterate over chain */ + + } /* for (endIndex < numUniqueSegment) */ + + thread_barrier_wait(); + + /* + * Step 2c: Update jump values and hashes + * + * endHash entries of all remaining ends are updated to the next + * substringLength. Additionally jumpToNext entries are updated such + * that they allow to skip non-end entries. Currently this is sequential + * because parallelization did not perform better. +. */ + + if (threadId == 0) { + if (substringLength > 1) { + long index = segmentLength - substringLength + 1; + /* initialization if j and i: with i being the next end after j=0 */ + for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) { + /* find first non-null */ + } + /* entry 0 is handled seperately from the loop below */ + endInfoEntries[0].jumpToNext = i; + if (endInfoEntries[0].isEnd) { + constructEntry_t* constructEntryPtr = &constructEntries[0]; + char* segment = constructEntryPtr->segment; + constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); + } + /* Continue scanning (do not reset i) */ + for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) { + if (endInfoEntries[i].isEnd) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + char* segment = constructEntryPtr->segment; + constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); + endInfoEntries[j].jumpToNext = MAX(1, (i - j)); + j = i; + } + } + endInfoEntries[j].jumpToNext = i - j; + } + } + + thread_barrier_wait(); + + } /* for (substringLength > 0) */ + + + thread_barrier_wait(); + + /* + * Step 3: Build sequence string + */ + if (threadId == 0) { + + long totalLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + if (constructEntryPtr->isStart) { + totalLength += constructEntryPtr->length; + } + } + + sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char)); + char* sequence = sequencerPtr->sequence; + assert(sequence); + + char* copyPtr = sequence; + long sequenceLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + /* If there are several start segments, we append in arbitrary order */ + if (constructEntryPtr->isStart) { + long newSequenceLength = sequenceLength + constructEntryPtr->length; + assert( newSequenceLength <= totalLength ); + copyPtr = sequence + sequenceLength; + sequenceLength = newSequenceLength; + do { + long numChar = segmentLength - constructEntryPtr->overlap; + if ((copyPtr + numChar) > (sequence + newSequenceLength)) { + TM_PRINT0("ERROR: sequence length != actual length\n"); + break; + } + memcpy(copyPtr, + constructEntryPtr->segment, + (numChar * sizeof(char))); + copyPtr += numChar; + } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL); + assert(copyPtr <= (sequence + sequenceLength)); + } + } + + assert(sequence != NULL); + sequence[sequenceLength] = '\0'; + } + + TM_THREAD_EXIT(); + + } + + + private class endInfoEntry { + boolean isEnd; + long jumpToNext; + } + + private class constructEntry { + boolean isStart; + String segment; + long endHash; + constructEntry startPtr; + constructEntry nextPtr; + constructEntry endPtr; + long overlap; + long length; + } + + private class sequencer_run_arg { + Sequencer sequencerPtr; + Segments segmentsPtr; + long preAllocLength; + String returnSequence; /* variable stores return value */ + } + /* ============================================================================= + * hashString + * -- uses sdbm hash function + * ============================================================================= + */ + static long hashString (String str) + { + long hash = 0; + long c; + + /* Note: Do not change this hashing scheme */ + while ((c = str++) != '\0') { + hash = c + (hash << 6) + (hash << 16) - hash; + } + + return (long)hash; + } + + + /* ============================================================================= + * hashSegment + * -- For hashtable + * ============================================================================= + */ + static long hashSegment (string keyPtr) + { + return (long)hash_sdbm(keyPtr); /* can be any "good" hash function */ + } + + + /* ============================================================================= + * compareSegment + * -- For hashtable + * ============================================================================= + */ + static long compareSegment (pair_t* a, pair_t* b) + { + return strcmp((char*)(a->firstPtr), (char*)(b->firstPtr)); + } + +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Sequencer.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/Sequencer.java~ new file mode 100644 index 00000000..b5895aac --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Sequencer.java~ @@ -0,0 +1,510 @@ +public class Sequencer { + + public char* sequence; + + public Segments segmentsPtr; + + /* For removing duplicate segments */ + Hashmap uniqueSegmentsPtr; + + /* For matching segments */ + endInfoEntry endInfoEntries[]; + Table startHashToConstructEntryTables[]; + + /* For constructing sequence */ + constructEntry constructEntries[]; + Table hashToConstructEntryTable; + + /* For deallocation */ + long segmentLength; + + + /* ============================================================================= + * sequencer_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Sequencer (long myGeneLength, long mySegmentLength, Segments mySegmentsPtr) { + + long maxNumUniqueSegment = myGeneLength - mySegmentLength + 1; + long i; + + uniqueSegmentsPtr = new Hashmap(myGeneLength); + + /* For finding a matching entry */ + endInfoEntries = new endInfoEntry[maxNumUniqueSegment]; + for (i = 0; i < maxNumUniqueSegment; i++) { + endInfoEntries[i].isEnd = TRUE; + endInfoEntries[i].jumpToNext = 1; + } + startHashToConstructEntryTables = new Table[mySegmentLength]; + for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */ + startHashToConstructEntryTables[i] = new Table(myGeneLength); + } + segmentLength = mySegmentLength; + + /* For constructing sequence */ + constructEntries = new ContructEntry[maxNumUniqueSegment]; + + for (i= 0; i < maxNumUniqueSegment; i++) { + constructEntries[i].isStart = TRUE; + constructEntries[i].segment = NULL; + constructEntries[i].endHash = 0; + constructEntries[i].startPtr = constructEntries[i]; + constructEntries[i].nextPtr = NULL; + constructEntries[i].endPtr = constructEntries[i]; + constructEntries[i].overlap = 0; + constructEntries[i].length = segmentLength; + } + hashToConstructEntryTable = new Table(geneLength); + + segmentsPtr = mySegmentsPtr; + } + + + /* ============================================================================= + * sequencer_run + * ============================================================================= + */ + + void run () { + + //TM_THREAD_ENTER(); + + long threadId = thread_getId(); + + //Sequencer sequencerPtr = (sequencer_t*)argPtr; + + Hashmap uniqueSegmentsPtr; + endInfoEntry endInfoEntries[]; + Hashmap startHashToConstructEntryTables[]; + constructEntry constructEntries[]; + Hashmap hashToConstructEntryTable; + + Vector segmentsContentsPtr = segmentsPtr.contentsPtr; + long numSegment = segmentsContentsPtr.size(); + long segmentLength = segmentsPtr.length; + + long i; + long j; + long i_start; + long i_stop; + long numUniqueSegment; + long substringLength; + long entryIndex; + + /* + * Step 1: Remove duplicate segments + */ +#if defined(HTM) || defined(STM) + long numThread = thread_getNumThread(); + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = numSegment; + } else { + i_stop = i_start + partitionSize; + } + } +#else /* !(HTM || STM) */ + i_start = 0; + i_stop = numSegment; +#endif /* !(HTM || STM) */ + for (i = i_start; i < i_stop; i+=CHUNK_STEP1) { + TM_BEGIN(); + { + long ii; + long ii_stop = MIN(i_stop, (i+CHUNK_STEP1)); + for (ii = i; ii < ii_stop; ii++) { + void* segment = vector_at(segmentsContentsPtr, ii); + TMHASHTABLE_INSERT(uniqueSegmentsPtr, + segment, + segment); + } /* ii */ + } + TM_END(); + } + + thread_barrier_wait(); + + /* + * Step 2a: Iterate over unique segments and compute hashes. + * + * For the gene "atcg", the hashes for the end would be: + * + * "t", "tc", and "tcg" + * + * And for the gene "tcgg", the hashes for the start would be: + * + * "t", "tc", and "tcg" + * + * The names are "end" and "start" because if a matching pair is found, + * they are the substring of the end part of the pair and the start + * part of the pair respectively. In the above example, "tcg" is the + * matching substring so: + * + * (end) (start) + * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg") + */ + + /* uniqueSegmentsPtr is constant now */ + numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr); + entryIndex = 0; + +#if defined(HTM) || defined(STM) + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + long num = uniqueSegmentsPtr->numBucket; + long partitionSize = (num + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = num; + } else { + i_stop = i_start + partitionSize; + } + } + { + /* Approximate disjoint segments of element allocation in constructEntries */ + long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + entryIndex = threadId * partitionSize; + } +#else /* !(HTM || STM) */ + i_start = 0; + i_stop = uniqueSegmentsPtr->numBucket; + entryIndex = 0; +#endif /* !(HTM || STM) */ + + for (i = i_start; i < i_stop; i++) { + + list_t* chainPtr = uniqueSegmentsPtr->buckets[i]; + list_iter_t it; + list_iter_reset(&it, chainPtr); + + while (list_iter_hasNext(&it, chainPtr)) { + + char* segment = + (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr; + constructEntry_t* constructEntryPtr; + long j; + ulong_t startHash; + bool_t status; + + /* Find an empty constructEntries entry */ + TM_BEGIN(); + while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) { + entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */ + } + constructEntryPtr = &constructEntries[entryIndex]; + TM_SHARED_WRITE_P(constructEntryPtr->segment, segment); + TM_END(); + entryIndex = (entryIndex + 1) % numUniqueSegment; + + /* + * Save hashes (sdbm algorithm) of segment substrings + * + * endHashes will be computed for shorter substrings after matches + * have been made (in the next phase of the code). This will reduce + * the number of substrings for which hashes need to be computed. + * + * Since we can compute startHashes incrementally, we go ahead + * and compute all of them here. + */ + /* constructEntryPtr is local now */ + constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]); + + startHash = 0; + for (j = 1; j < segmentLength; j++) { + startHash = (ulong_t)segment[j-1] + + (startHash << 6) + (startHash << 16) - startHash; + TM_BEGIN(); + status = TMTABLE_INSERT(startHashToConstructEntryTables[j], + (ulong_t)startHash, + (void*)constructEntryPtr ); + TM_END(); + assert(status); + } + + /* + * For looking up construct entries quickly + */ + startHash = (ulong_t)segment[j-1] + + (startHash << 6) + (startHash << 16) - startHash; + TM_BEGIN(); + status = TMTABLE_INSERT(hashToConstructEntryTable, + (ulong_t)startHash, + (void*)constructEntryPtr); + TM_END(); + assert(status); + } + } + + thread_barrier_wait(); + + /* + * Step 2b: Match ends to starts by using hash-based string comparison. + */ + for (substringLength = segmentLength-1; substringLength > 0; substringLength--) { + + table_t* startHashToConstructEntryTablePtr = + startHashToConstructEntryTables[substringLength]; + list_t** buckets = startHashToConstructEntryTablePtr->buckets; + long numBucket = startHashToConstructEntryTablePtr->numBucket; + + long index_start; + long index_stop; + +#if defined(HTM) || defined(STM) + { + /* Choose disjoint segments [index_start,index_stop) for each thread */ + long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + index_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + index_stop = numUniqueSegment; + } else { + index_stop = index_start + partitionSize; + } + } +#else /* !(HTM || STM) */ + index_start = 0; + index_stop = numUniqueSegment; +#endif /* !(HTM || STM) */ + + /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */ + for (entryIndex = index_start; + entryIndex < index_stop; + entryIndex += endInfoEntries[entryIndex].jumpToNext) + { + if (!endInfoEntries[entryIndex].isEnd) { + continue; + } + + /* ConstructEntries[entryIndex] is local data */ + constructEntry_t* endConstructEntryPtr = + &constructEntries[entryIndex]; + char* endSegment = endConstructEntryPtr->segment; + ulong_t endHash = endConstructEntryPtr->endHash; + + list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */ + list_iter_t it; + list_iter_reset(&it, chainPtr); + + /* Linked list at chainPtr is constant */ + while (list_iter_hasNext(&it, chainPtr)) { + + constructEntry_t* startConstructEntryPtr = + (constructEntry_t*)list_iter_next(&it, chainPtr); + char* startSegment = startConstructEntryPtr->segment; + long newLength = 0; + + /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */ + TM_BEGIN(); + + /* Check if matches */ + if (TM_SHARED_READ(startConstructEntryPtr->isStart) && + (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) && + (strncmp(startSegment, + &endSegment[segmentLength - substringLength], + substringLength) == 0)) + { + TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE); + + constructEntry_t* startConstructEntry_endPtr; + constructEntry_t* endConstructEntry_startPtr; + + /* Update endInfo (appended something so no longer end) */ + TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE); + + /* Update segment chain construct info */ + startConstructEntry_endPtr = + (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr); + endConstructEntry_startPtr = + (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr); + + assert(startConstructEntry_endPtr); + assert(endConstructEntry_startPtr); + TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr, + endConstructEntry_startPtr); + TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr, + startConstructEntryPtr); + TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr, + startConstructEntry_endPtr); + TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength); + newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) + + (long)TM_SHARED_READ(startConstructEntryPtr->length) - + substringLength; + TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength); + } /* if (matched) */ + + TM_END(); + + if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */ + break; + } + } /* iterate over chain */ + + } /* for (endIndex < numUniqueSegment) */ + + thread_barrier_wait(); + + /* + * Step 2c: Update jump values and hashes + * + * endHash entries of all remaining ends are updated to the next + * substringLength. Additionally jumpToNext entries are updated such + * that they allow to skip non-end entries. Currently this is sequential + * because parallelization did not perform better. +. */ + + if (threadId == 0) { + if (substringLength > 1) { + long index = segmentLength - substringLength + 1; + /* initialization if j and i: with i being the next end after j=0 */ + for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) { + /* find first non-null */ + } + /* entry 0 is handled seperately from the loop below */ + endInfoEntries[0].jumpToNext = i; + if (endInfoEntries[0].isEnd) { + constructEntry_t* constructEntryPtr = &constructEntries[0]; + char* segment = constructEntryPtr->segment; + constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); + } + /* Continue scanning (do not reset i) */ + for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) { + if (endInfoEntries[i].isEnd) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + char* segment = constructEntryPtr->segment; + constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); + endInfoEntries[j].jumpToNext = MAX(1, (i - j)); + j = i; + } + } + endInfoEntries[j].jumpToNext = i - j; + } + } + + thread_barrier_wait(); + + } /* for (substringLength > 0) */ + + + thread_barrier_wait(); + + /* + * Step 3: Build sequence string + */ + if (threadId == 0) { + + long totalLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + if (constructEntryPtr->isStart) { + totalLength += constructEntryPtr->length; + } + } + + sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char)); + char* sequence = sequencerPtr->sequence; + assert(sequence); + + char* copyPtr = sequence; + long sequenceLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + /* If there are several start segments, we append in arbitrary order */ + if (constructEntryPtr->isStart) { + long newSequenceLength = sequenceLength + constructEntryPtr->length; + assert( newSequenceLength <= totalLength ); + copyPtr = sequence + sequenceLength; + sequenceLength = newSequenceLength; + do { + long numChar = segmentLength - constructEntryPtr->overlap; + if ((copyPtr + numChar) > (sequence + newSequenceLength)) { + TM_PRINT0("ERROR: sequence length != actual length\n"); + break; + } + memcpy(copyPtr, + constructEntryPtr->segment, + (numChar * sizeof(char))); + copyPtr += numChar; + } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL); + assert(copyPtr <= (sequence + sequenceLength)); + } + } + + assert(sequence != NULL); + sequence[sequenceLength] = '\0'; + } + + TM_THREAD_EXIT(); + + } + + + private class endInfoEntry { + boolean isEnd; + long jumpToNext; + } + + private class constructEntry { + boolean isStart; + String segment; + long endHash; + constructEntry startPtr; + constructEntry nextPtr; + constructEntry endPtr; + long overlap; + long length; + } + + private class sequencer_run_arg { + Sequencer sequencerPtr; + Segments segmentsPtr; + long preAllocLength; + String returnSequence; /* variable stores return value */ + } + /* ============================================================================= + * hashString + * -- uses sdbm hash function + * ============================================================================= + */ + static long hashString (String str) + { + long hash = 0; + long c; + + /* Note: Do not change this hashing scheme */ + while ((c = str++) != '\0') { + hash = c + (hash << 6) + (hash << 16) - hash; + } + + return (long)hash; + } + + + /* ============================================================================= + * hashSegment + * -- For hashtable + * ============================================================================= + */ + static long hashSegment (string keyPtr) + { + return (long)hash_sdbm(keyPtr); /* can be any "good" hash function */ + } + + + /* ============================================================================= + * compareSegment + * -- For hashtable + * ============================================================================= + */ + static long compareSegment (pair_t* a, pair_t* b) + { + return strcmp((char*)(a->firstPtr), (char*)(b->firstPtr)); + } + +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Table.java b/Robust/src/Benchmarks/SingleTM/genome/java/Table.java new file mode 100644 index 00000000..83893a87 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Table.java @@ -0,0 +1,57 @@ +public class Table { + + LinkedList buckets[]; + long numBucket; + + + /* ============================================================================= + * table_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Table (long myNumBucket) { + + long i; + + buckets = new LinkedList[myNumBucket]; + + numBucket = myNumBucket; + + } + + + /* ============================================================================= + * table_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ + boolean table_insert (long hash, void* dataPtr) { + long i = hash % numBucket; + + if(buckets[i].indexOf(dataPtr) != -1) { + return FALSE; + } + + buckets[i].add(dataPtr); + + return TRUE; + } + + /* ============================================================================= + * table_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ + boolean table_remove (long hash, void* dataPtr) { + + long i = hash % numBucket; + + if (!buckets[i].remove(dataPtr) { + return FALSE; + } + + return TRUE; + + } + +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/Table.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/Table.java~ new file mode 100644 index 00000000..28b2ed5f --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/Table.java~ @@ -0,0 +1,57 @@ +public class Table { + + LinkedList buckets[]; + long numBucket; + + + /* ============================================================================= + * table_alloc + * -- Returns NULL on failure + * ============================================================================= + */ + Table (long myNumBucket) { + + long i; + + buckets = new LinkedList[myNumBucket]; + + numBucket = myNumBucket; + + } + + + /* ============================================================================= + * table_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ + boolean table_insert (long hash, void* dataPtr) { + long i = hash % numBucket; + + if(buckets[i].indexOf(dataPtr) != -1) { + return FALSE; + } + + buckets[i].add(dataPtr); + + return TRUE; + } + + /* ============================================================================= + * table_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ + boolean table_remove (long hash, void* dataPtr) { + + long i = hash % numBucket; + + if (!list_remove(tablePtr->buckets[i], dataPtr)) { + return FALSE; + } + + return TRUE; + + } + +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/java/genome.java~ b/Robust/src/Benchmarks/SingleTM/genome/java/genome.java~ new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/java/genome.java~ @@ -0,0 +1 @@ + diff --git a/Robust/src/Benchmarks/SingleTM/genome/nucleotide.h b/Robust/src/Benchmarks/SingleTM/genome/nucleotide.h new file mode 100644 index 00000000..c7b5ed4e --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/nucleotide.h @@ -0,0 +1,93 @@ +/* ============================================================================= + * + * nucleotide.h + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#ifndef NUCLEOTIDE_H +#define NUCLEOTIDE_H 1 + + +typedef enum nucleotide_type { + NUCLEOTIDE_ADENINE = 'a', + NUCLEOTIDE_CYTOSINE = 'c', + NUCLEOTIDE_GUANINE = 'g', + NUCLEOTIDE_THYMINE = 't', + NUCLEOTIDE_NUM_TYPE = 4 +} nucleotide_type_t; + + +#endif /* NUCLEOTIDE_H */ + + +/* ============================================================================= + * + * End of nucleotide.h + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/segments.c b/Robust/src/Benchmarks/SingleTM/genome/segments.c new file mode 100644 index 00000000..e89271d9 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/segments.c @@ -0,0 +1,318 @@ +/* ============================================================================= + * + * segments.c + * -- Create random segments from random gene + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#include +#include +#include +#include "gene.h" +#include "random.h" +#include "segments.h" +#include "utility.h" +#include "vector.h" + + +/* ============================================================================= + * segments_alloc + * -- Does almost all the memory allocation for random segments + * -- The actual number of segments created by 'segments_create' may be larger + * than 'minNum' to ensure the segments overlap and cover the entire gene + * -- Returns NULL on failure + * ============================================================================= + */ +segments_t* +segments_alloc (long length, long minNum) +{ + segments_t* segmentsPtr; + long i; + char* string; + + segmentsPtr = (segments_t*)malloc(sizeof(segments_t)); + if (segmentsPtr == NULL) { + return NULL; + } + + /* Preallocate for the min number of segments we will need */ + segmentsPtr->strings = (char**)malloc(minNum * sizeof(char*)); + if (segmentsPtr->strings == NULL) { + return NULL; + } + + string = (char*)malloc(minNum * (length+1) * sizeof(char)); + if (string == NULL) { + return NULL; + } + for (i = 0; i < minNum; i++) { + segmentsPtr->strings[i] = &string[i * (length+1)]; + segmentsPtr->strings[i][length] = '\0'; + } + segmentsPtr->minNum = minNum; + segmentsPtr->length = length; + + segmentsPtr->contentsPtr = vector_alloc(minNum); + if (segmentsPtr->contentsPtr == NULL) { + return NULL; + } + + return segmentsPtr; +} + + +/* ============================================================================= + * segments_create + * -- Populates 'contentsPtr' + * ============================================================================= + */ +void +segments_create (segments_t* segmentsPtr, gene_t* genePtr, random_t* randomPtr) +{ + vector_t* segmentsContentsPtr; + char** strings; + long segmentLength; + long minNumSegment; + char* geneString; + long geneLength; + bitmap_t* startBitmapPtr; + long numStart; + long i; + long maxZeroRunLength; + + assert(segmentsPtr != NULL); + assert(genePtr != NULL); + assert(randomPtr != NULL); + + segmentsContentsPtr = segmentsPtr->contentsPtr; + strings = segmentsPtr->strings; + segmentLength = segmentsPtr->length; + minNumSegment = segmentsPtr->minNum; + + geneString = genePtr->contents; + geneLength = genePtr->length; + startBitmapPtr = genePtr->startBitmapPtr; + numStart = geneLength - segmentLength + 1; + + /* Pick some random segments to start */ + for (i = 0; i < minNumSegment; i++) { + long j = (long)(random_generate(randomPtr) % numStart); + bool_t status = bitmap_set(startBitmapPtr, j); + assert(status); + memcpy(strings[i], &(geneString[j]), segmentLength * sizeof(char)); + status = vector_pushBack(segmentsContentsPtr, (void*)strings[i]); + assert(status); + } + + /* Make sure segment covers start */ + i = 0; + if (!bitmap_isSet(startBitmapPtr, i)) { + char* string = (char*)malloc((segmentLength+1) * sizeof(char)); + string[segmentLength] = '\0'; + memcpy(string, &(geneString[i]), segmentLength * sizeof(char)); + bool_t status = vector_pushBack(segmentsContentsPtr, (void*)string); + assert(status); + status = bitmap_set(startBitmapPtr, i); + assert(status); + } + + /* Add extra segments to fill holes and ensure overlap */ + maxZeroRunLength = segmentLength - 1; + for (i = 0; i < numStart; i++) { + long i_stop = MIN((i+maxZeroRunLength), numStart); + for ( /* continue */; i < i_stop; i++) { + if (bitmap_isSet(startBitmapPtr, i)) { + break; + } + } + if (i == i_stop) { + /* Found big enough hole */ + char* string = (char*)malloc((segmentLength+1) * sizeof(char)); + string[segmentLength] = '\0'; + i = i - 1; + memcpy(string, &(geneString[i]), segmentLength * sizeof(char)); + bool_t status = vector_pushBack(segmentsContentsPtr, (void*)string); + assert(status); + status = bitmap_set(startBitmapPtr, i); + assert(status); + } + } +} + + +/* ============================================================================= + * segments_free + * ============================================================================= + */ +void +segments_free (segments_t* segmentsPtr) +{ + free(vector_at(segmentsPtr->contentsPtr, 0)); + vector_free(segmentsPtr->contentsPtr); + free(segmentsPtr->strings); + free(segmentsPtr); +} + + +/* ============================================================================= + * TEST_SEGMENTS + * ============================================================================= + */ +#ifdef TEST_SEGMENTS + + +#include +#include +#include +#include "types.h" + + +static void +tester (long geneLength, long segmentLength, long minNumSegment, bool_t doPrint) +{ + gene_t* genePtr; + segments_t* segmentsPtr; + random_t* randomPtr; + bitmap_t* startBitmapPtr; + long i; + long j; + + genePtr = gene_alloc(geneLength); + segmentsPtr = segments_alloc(segmentLength, minNumSegment); + randomPtr = random_alloc(); + startBitmapPtr = bitmap_alloc(geneLength); + + random_seed(randomPtr, 0); + gene_create(genePtr, randomPtr); + random_seed(randomPtr, 0); + segments_create(segmentsPtr, genePtr, randomPtr); + + assert(segmentsPtr->minNum == minNumSegment); + assert(vector_getSize(segmentsPtr->contentsPtr) >= minNumSegment); + + if (doPrint) { + printf("Gene = %s\n", genePtr->contents); + } + + /* Check that each segment occurs in gene */ + for (i = 0; i < vector_getSize(segmentsPtr->contentsPtr); i++) { + char *charPtr = strstr(genePtr->contents, + (char*)vector_at(segmentsPtr->contentsPtr, i)); + assert(charPtr != NULL); + j = charPtr - genePtr->contents; + bitmap_set(startBitmapPtr, j); + if (doPrint) { + printf("Segment %li (@%li) = %s\n", + i, j, (char*)vector_at(segmentsPtr->contentsPtr, i)); + } + } + + /* Check that there is complete overlap */ + assert(bitmap_isSet(startBitmapPtr, 0)); + for (i = 0, j = 0; i < geneLength; i++ ) { + if (bitmap_isSet(startBitmapPtr, i)) { + assert((i-j-1) < segmentLength); + j = i; + } + } + + gene_free(genePtr); + segments_free(segmentsPtr); + random_free(randomPtr); + bitmap_free(startBitmapPtr); +} + + +int +main () +{ + bool_t status = memory_init(1, 4, 2) + assert(status); + + puts("Starting..."); + + tester(10, 4, 20, TRUE); + tester(20, 5, 1, TRUE); + tester(100, 10, 1000, FALSE); + tester(100, 10, 1, FALSE); + + puts("All tests passed."); + + return 0; +} + + +#endif /* TEST_SEGMENTS */ + + +/* ============================================================================= + * + * End of segments.c + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/segments.h b/Robust/src/Benchmarks/SingleTM/genome/segments.h new file mode 100644 index 00000000..bf8473f8 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/segments.h @@ -0,0 +1,128 @@ +/* ============================================================================= + * + * segments.h + * -- Create random segments from random gene + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#ifndef SEGMENTS_H +#define SEGMENTS_H 1 + + +#include "gene.h" +#include "random.h" +#include "vector.h" + + +typedef struct segments { + long length; + long minNum; + vector_t* contentsPtr; +/* private: */ + char** strings; +} segments_t; + + +/* ============================================================================= + * segments_alloc + * -- Does almost all the memory allocation for random segments + * -- The actual number of segments created by 'segments_create' may be larger + * than 'minNum' to ensure the segments overlap and cover the entire gene + * -- Returns NULL on failure + * ============================================================================= + */ +segments_t* +segments_alloc (long length, long minNum); + + +/* ============================================================================= + * segments_create + * -- Populates 'contentsPtr' + * ============================================================================= + */ +void +segments_create (segments_t* segmentsPtr, gene_t* genePtr, random_t* randomPtr); + + +/* ============================================================================= + * segments_free + * ============================================================================= + */ +void +segments_free (segments_t* segmentsPtr); + + +#endif /* SEGMENTS_H */ + + +/* ============================================================================= + * + * End of segments.h + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/segments.o b/Robust/src/Benchmarks/SingleTM/genome/segments.o new file mode 100644 index 00000000..03bc11b3 Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/segments.o differ diff --git a/Robust/src/Benchmarks/SingleTM/genome/sequencer.c b/Robust/src/Benchmarks/SingleTM/genome/sequencer.c new file mode 100644 index 00000000..558d45ab --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/sequencer.c @@ -0,0 +1,942 @@ +/* ============================================================================= + * + * sequencer.c + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * Algorithm overview: + * + * 1) Remove duplicate segments by using hash-set + * 2) Match segments using hash-based comparisons + * - Cycles are prevented by tracking starts/ends of matched chains + * 3) Build sequence + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#include "tm.h" + +#include +#include +#include +#include "hash.h" +#include "hashtable.h" +#include "segments.h" +#include "sequencer.h" +#include "table.h" +#include "thread.h" +#include "utility.h" +#include "vector.h" +#include "types.h" + + +struct endInfoEntry { + bool_t isEnd; + long jumpToNext; +}; + +struct constructEntry { + bool_t isStart; + char* segment; + ulong_t endHash; + struct constructEntry* startPtr; + struct constructEntry* nextPtr; + struct constructEntry* endPtr; + long overlap; + long length; +}; + + +/* ============================================================================= + * hashString + * -- uses sdbm hash function + * ============================================================================= + */ +static ulong_t +hashString (char* str) +{ + ulong_t hash = 0; + long c; + + /* Note: Do not change this hashing scheme */ + while ((c = *str++) != '\0') { + hash = c + (hash << 6) + (hash << 16) - hash; + } + + return (ulong_t)hash; +} + + +/* ============================================================================= + * hashSegment + * -- For hashtable + * ============================================================================= + */ +static ulong_t +hashSegment (const void* keyPtr) +{ + return (ulong_t)hash_sdbm((char*)keyPtr); /* can be any "good" hash function */ +} + + +/* ============================================================================= + * compareSegment + * -- For hashtable + * ============================================================================= + */ +static long +compareSegment (const pair_t* a, const pair_t* b) +{ + return strcmp((char*)(a->firstPtr), (char*)(b->firstPtr)); +} + + +/* ============================================================================= + * sequencer_alloc + * -- Returns NULL on failure + * ============================================================================= + */ +sequencer_t* +sequencer_alloc (long geneLength, long segmentLength, segments_t* segmentsPtr) +{ + sequencer_t* sequencerPtr; + long maxNumUniqueSegment = geneLength - segmentLength + 1; + long i; + + sequencerPtr = (sequencer_t*)malloc(sizeof(sequencer_t)); + if (sequencerPtr == NULL) { + return NULL; + } + + sequencerPtr->uniqueSegmentsPtr = + hashtable_alloc(geneLength, &hashSegment, &compareSegment, -1, -1); + if (sequencerPtr->uniqueSegmentsPtr == NULL) { + return NULL; + } + + /* For finding a matching entry */ + sequencerPtr->endInfoEntries = + (endInfoEntry_t*)malloc(maxNumUniqueSegment * sizeof(endInfoEntry_t)); + for (i = 0; i < maxNumUniqueSegment; i++) { + endInfoEntry_t* endInfoEntryPtr = &sequencerPtr->endInfoEntries[i]; + endInfoEntryPtr->isEnd = TRUE; + endInfoEntryPtr->jumpToNext = 1; + } + sequencerPtr->startHashToConstructEntryTables = + (table_t**)malloc(segmentLength * sizeof(table_t*)); + if (sequencerPtr->startHashToConstructEntryTables == NULL) { + return NULL; + } + for (i = 1; i < segmentLength; i++) { /* 0 is dummy entry */ + sequencerPtr->startHashToConstructEntryTables[i] = + table_alloc(geneLength, NULL); + if (sequencerPtr->startHashToConstructEntryTables[i] == NULL) { + return NULL; + } + } + sequencerPtr->segmentLength = segmentLength; + + /* For constructing sequence */ + sequencerPtr->constructEntries = + (constructEntry_t*)malloc(maxNumUniqueSegment * sizeof(constructEntry_t)); + if (sequencerPtr->constructEntries == NULL) { + return NULL; + } + for (i= 0; i < maxNumUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &sequencerPtr->constructEntries[i]; + constructEntryPtr->isStart = TRUE; + constructEntryPtr->segment = NULL; + constructEntryPtr->endHash = 0; + constructEntryPtr->startPtr = constructEntryPtr; + constructEntryPtr->nextPtr = NULL; + constructEntryPtr->endPtr = constructEntryPtr; + constructEntryPtr->overlap = 0; + constructEntryPtr->length = segmentLength; + } + sequencerPtr->hashToConstructEntryTable = table_alloc(geneLength, NULL); + if (sequencerPtr->hashToConstructEntryTable == NULL) { + return NULL; + } + + sequencerPtr->segmentsPtr = segmentsPtr; + + return sequencerPtr; +} + + +/* ============================================================================= + * sequencer_run + * ============================================================================= + */ +void +sequencer_run (void* argPtr) +{ + TM_THREAD_ENTER(); + + long threadId = thread_getId(); + + sequencer_t* sequencerPtr = (sequencer_t*)argPtr; + + hashtable_t* uniqueSegmentsPtr; + endInfoEntry_t* endInfoEntries; + table_t** startHashToConstructEntryTables; + constructEntry_t* constructEntries; + table_t* hashToConstructEntryTable; + + uniqueSegmentsPtr = sequencerPtr->uniqueSegmentsPtr; + endInfoEntries = sequencerPtr->endInfoEntries; + startHashToConstructEntryTables = sequencerPtr->startHashToConstructEntryTables; + constructEntries = sequencerPtr->constructEntries; + hashToConstructEntryTable = sequencerPtr->hashToConstructEntryTable; + + segments_t* segmentsPtr = sequencerPtr->segmentsPtr; + assert(segmentsPtr); + vector_t* segmentsContentsPtr = segmentsPtr->contentsPtr; + long numSegment = vector_getSize(segmentsContentsPtr); + long segmentLength = segmentsPtr->length; + + long i; + long j; + long i_start; + long i_stop; + long numUniqueSegment; + long substringLength; + long entryIndex; + + /* + * Step 1: Remove duplicate segments + */ +#if defined(HTM) || defined(STM) + long numThread = thread_getNumThread(); + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = numSegment; + } else { + i_stop = i_start + partitionSize; + } + } +#else /* !(HTM || STM) */ + i_start = 0; + i_stop = numSegment; +#endif /* !(HTM || STM) */ + for (i = i_start; i < i_stop; i+=CHUNK_STEP1) { + TM_BEGIN(); + { + long ii; + long ii_stop = MIN(i_stop, (i+CHUNK_STEP1)); + for (ii = i; ii < ii_stop; ii++) { + void* segment = vector_at(segmentsContentsPtr, ii); + TMHASHTABLE_INSERT(uniqueSegmentsPtr, + segment, + segment); + } /* ii */ + } + TM_END(); + } + + thread_barrier_wait(); + + /* + * Step 2a: Iterate over unique segments and compute hashes. + * + * For the gene "atcg", the hashes for the end would be: + * + * "t", "tc", and "tcg" + * + * And for the gene "tcgg", the hashes for the start would be: + * + * "t", "tc", and "tcg" + * + * The names are "end" and "start" because if a matching pair is found, + * they are the substring of the end part of the pair and the start + * part of the pair respectively. In the above example, "tcg" is the + * matching substring so: + * + * (end) (start) + * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg") + */ + + /* uniqueSegmentsPtr is constant now */ + numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr); + entryIndex = 0; + +#if defined(HTM) || defined(STM) + { + /* Choose disjoint segments [i_start,i_stop) for each thread */ + long num = uniqueSegmentsPtr->numBucket; + long partitionSize = (num + numThread/2) / numThread; /* with rounding */ + i_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + i_stop = num; + } else { + i_stop = i_start + partitionSize; + } + } + { + /* Approximate disjoint segments of element allocation in constructEntries */ + long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + entryIndex = threadId * partitionSize; + } +#else /* !(HTM || STM) */ + i_start = 0; + i_stop = uniqueSegmentsPtr->numBucket; + entryIndex = 0; +#endif /* !(HTM || STM) */ + + for (i = i_start; i < i_stop; i++) { + + list_t* chainPtr = uniqueSegmentsPtr->buckets[i]; + list_iter_t it; + list_iter_reset(&it, chainPtr); + + while (list_iter_hasNext(&it, chainPtr)) { + + char* segment = + (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr; + constructEntry_t* constructEntryPtr; + long j; + ulong_t startHash; + bool_t status; + + /* Find an empty constructEntries entry */ + TM_BEGIN(); + while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) { + entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */ + } + constructEntryPtr = &constructEntries[entryIndex]; + TM_SHARED_WRITE_P(constructEntryPtr->segment, segment); + TM_END(); + entryIndex = (entryIndex + 1) % numUniqueSegment; + + /* + * Save hashes (sdbm algorithm) of segment substrings + * + * endHashes will be computed for shorter substrings after matches + * have been made (in the next phase of the code). This will reduce + * the number of substrings for which hashes need to be computed. + * + * Since we can compute startHashes incrementally, we go ahead + * and compute all of them here. + */ + /* constructEntryPtr is local now */ + constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]); + + startHash = 0; + for (j = 1; j < segmentLength; j++) { + startHash = (ulong_t)segment[j-1] + + (startHash << 6) + (startHash << 16) - startHash; + TM_BEGIN(); + status = TMTABLE_INSERT(startHashToConstructEntryTables[j], + (ulong_t)startHash, + (void*)constructEntryPtr ); + TM_END(); + assert(status); + } + + /* + * For looking up construct entries quickly + */ + startHash = (ulong_t)segment[j-1] + + (startHash << 6) + (startHash << 16) - startHash; + TM_BEGIN(); + status = TMTABLE_INSERT(hashToConstructEntryTable, + (ulong_t)startHash, + (void*)constructEntryPtr); + TM_END(); + assert(status); + } + } + + thread_barrier_wait(); + + /* + * Step 2b: Match ends to starts by using hash-based string comparison. + */ + for (substringLength = segmentLength-1; substringLength > 0; substringLength--) { + + table_t* startHashToConstructEntryTablePtr = + startHashToConstructEntryTables[substringLength]; + list_t** buckets = startHashToConstructEntryTablePtr->buckets; + long numBucket = startHashToConstructEntryTablePtr->numBucket; + + long index_start; + long index_stop; + +#if defined(HTM) || defined(STM) + { + /* Choose disjoint segments [index_start,index_stop) for each thread */ + long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ + index_start = threadId * partitionSize; + if (threadId == (numThread - 1)) { + index_stop = numUniqueSegment; + } else { + index_stop = index_start + partitionSize; + } + } +#else /* !(HTM || STM) */ + index_start = 0; + index_stop = numUniqueSegment; +#endif /* !(HTM || STM) */ + + /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */ + for (entryIndex = index_start; + entryIndex < index_stop; + entryIndex += endInfoEntries[entryIndex].jumpToNext) + { + if (!endInfoEntries[entryIndex].isEnd) { + continue; + } + + /* ConstructEntries[entryIndex] is local data */ + constructEntry_t* endConstructEntryPtr = + &constructEntries[entryIndex]; + char* endSegment = endConstructEntryPtr->segment; + ulong_t endHash = endConstructEntryPtr->endHash; + + list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */ + list_iter_t it; + list_iter_reset(&it, chainPtr); + + /* Linked list at chainPtr is constant */ + while (list_iter_hasNext(&it, chainPtr)) { + + constructEntry_t* startConstructEntryPtr = + (constructEntry_t*)list_iter_next(&it, chainPtr); + char* startSegment = startConstructEntryPtr->segment; + long newLength = 0; + + /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */ + TM_BEGIN(); + + /* Check if matches */ + if (TM_SHARED_READ(startConstructEntryPtr->isStart) && + (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) && + (strncmp(startSegment, + &endSegment[segmentLength - substringLength], + substringLength) == 0)) + { + TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE); + + constructEntry_t* startConstructEntry_endPtr; + constructEntry_t* endConstructEntry_startPtr; + + /* Update endInfo (appended something so no longer end) */ + TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE); + + /* Update segment chain construct info */ + startConstructEntry_endPtr = + (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr); + endConstructEntry_startPtr = + (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr); + + assert(startConstructEntry_endPtr); + assert(endConstructEntry_startPtr); + TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr, + endConstructEntry_startPtr); + TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr, + startConstructEntryPtr); + TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr, + startConstructEntry_endPtr); + TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength); + newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) + + (long)TM_SHARED_READ(startConstructEntryPtr->length) - + substringLength; + TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength); + } /* if (matched) */ + + TM_END(); + + if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */ + break; + } + } /* iterate over chain */ + + } /* for (endIndex < numUniqueSegment) */ + + thread_barrier_wait(); + + /* + * Step 2c: Update jump values and hashes + * + * endHash entries of all remaining ends are updated to the next + * substringLength. Additionally jumpToNext entries are updated such + * that they allow to skip non-end entries. Currently this is sequential + * because parallelization did not perform better. +. */ + + if (threadId == 0) { + if (substringLength > 1) { + long index = segmentLength - substringLength + 1; + /* initialization if j and i: with i being the next end after j=0 */ + for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) { + /* find first non-null */ + } + /* entry 0 is handled seperately from the loop below */ + endInfoEntries[0].jumpToNext = i; + if (endInfoEntries[0].isEnd) { + constructEntry_t* constructEntryPtr = &constructEntries[0]; + char* segment = constructEntryPtr->segment; + constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); + } + /* Continue scanning (do not reset i) */ + for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) { + if (endInfoEntries[i].isEnd) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + char* segment = constructEntryPtr->segment; + constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); + endInfoEntries[j].jumpToNext = MAX(1, (i - j)); + j = i; + } + } + endInfoEntries[j].jumpToNext = i - j; + } + } + + thread_barrier_wait(); + + } /* for (substringLength > 0) */ + + + thread_barrier_wait(); + + /* + * Step 3: Build sequence string + */ + if (threadId == 0) { + + long totalLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + if (constructEntryPtr->isStart) { + totalLength += constructEntryPtr->length; + } + } + + sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char)); + char* sequence = sequencerPtr->sequence; + assert(sequence); + + char* copyPtr = sequence; + long sequenceLength = 0; + + for (i = 0; i < numUniqueSegment; i++) { + constructEntry_t* constructEntryPtr = &constructEntries[i]; + /* If there are several start segments, we append in arbitrary order */ + if (constructEntryPtr->isStart) { + long newSequenceLength = sequenceLength + constructEntryPtr->length; + assert( newSequenceLength <= totalLength ); + copyPtr = sequence + sequenceLength; + sequenceLength = newSequenceLength; + do { + long numChar = segmentLength - constructEntryPtr->overlap; + if ((copyPtr + numChar) > (sequence + newSequenceLength)) { + TM_PRINT0("ERROR: sequence length != actual length\n"); + break; + } + memcpy(copyPtr, + constructEntryPtr->segment, + (numChar * sizeof(char))); + copyPtr += numChar; + } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL); + assert(copyPtr <= (sequence + sequenceLength)); + } + } + + assert(sequence != NULL); + sequence[sequenceLength] = '\0'; + } + + TM_THREAD_EXIT(); +} + + +/* ============================================================================= + * sequencer_free + * ============================================================================= + */ +void +sequencer_free (sequencer_t* sequencerPtr) +{ + long i; + + table_free(sequencerPtr->hashToConstructEntryTable); + free(sequencerPtr->constructEntries); + for (i = 1; i < sequencerPtr->segmentLength; i++) { + table_free(sequencerPtr->startHashToConstructEntryTables[i]); + } + free(sequencerPtr->startHashToConstructEntryTables); + free(sequencerPtr->endInfoEntries); +#if 0 + /* TODO: fix mixed sequential/parallel allocation */ + hashtable_free(sequencerPtr->uniqueSegmentsPtr); + if (sequencerPtr->sequence != NULL) { + free(sequencerPtr->sequence); + } +#endif + free(sequencerPtr); +} + + +/* ============================================================================= + * TEST_SEQUENCER + * ============================================================================= + */ +#ifdef TEST_SEQUENCER + + +#include +#include +#include "segments.h" + + +char* gene1 = "gatcggcagc"; +char* segments1[] = { + "atcg", + "gcag", + "tcgg", + "cagc", + "gatc", + NULL +}; + +char* gene2 = "aaagc"; +char* segments2[] = { + "aaa", + "aag", + "agc", + NULL +}; + +char* gene3 = "aaacaaagaaat"; +char* segments3[] = { + "aaac", + "aaag", + "aaat", + NULL +}; + +char* gene4 = "ttggctacgtatcgcacggt"; +char* segments4[] = { + "cgtatcgc", + "tcgcacgg", + "gtatcgca", + "tatcgcac", + "atcgcacg", + "ttggctac", + "ctacgtat", + "acgtatcg", + "ctacgtat", + "cgtatcgc", + "atcgcacg", + "ggctacgt", + "tacgtatc", + "tcgcacgg", + "ttggctac", + "ggctacgt", + "atcgcacg", + "tatcgcac", + "cgtatcgc", + "acgtatcg", + "gtatcgca", + "gtatcgca", + "cgcacggt", + "tatcgcac", + "ttggctac", + "atcgcacg", + "acgtatcg", + "gtatcgca", + "ttggctac", + "tggctacg", + NULL +}; + +char* gene5 = "gatcggcagctggtacggcg"; +char* segments5[] = { + "atcggcag", + "gtacggcg", + "gatcggca", + "cagctggt", + "tggtacgg", + "gatcggca", + "gatcggca", + "tcggcagc", + "ggtacggc", + "tggtacgg", + "tcggcagc", + "gcagctgg", + "gatcggca", + "gctggtac", + "gatcggca", + "ctggtacg", + "ggcagctg", + "tcggcagc", + "gtacggcg", + "gcagctgg", + "ggcagctg", + "tcggcagc", + "cagctggt", + "tggtacgg", + "cagctggt", + "gcagctgg", + "gctggtac", + "cggcagct", + "agctggta", + "ctggtacg", + NULL +}; + +char* gene6 = "ttggtgagccgtaagactcc"; +char* segments6[] = { + "cgtaagac", + "taagactc", + "gtgagccg", + "gagccgta", + "gccgtaag", + "tgagccgt", + "gccgtaag", + "cgtaagac", + "ttggtgag", + "agccgtaa", + "gccgtaag", + "aagactcc", + "ggtgagcc", + "ttggtgag", + "agccgtaa", + "gagccgta", + "aagactcc", + "ttggtgag", + "gtaagact", + "ccgtaaga", + "ttggtgag", + "gagccgta", + "ggtgagcc", + "gagccgta", + "gccgtaag", + "aagactcc", + "gtaagact", + "ccgtaaga", + "tgagccgt", + "ttggtgag", + NULL +}; + +char* gene7 = "gatcggcagctggtacggcg"; +char* segments7[] = { + "atcggcag", + "gtacggcg", + "gatcggca", + "cagctggt", + "tggtacgg", + "gatcggca", + "gatcggca", + "tcggcagc", + "ggtacggc", + "tggtacgg", + "tcggcagc", + "gcagctgg", + "gatcggca", + "gctggtac", + "gatcggca", + "ctggtacg", + "ggcagctg", + "tcggcagc", + "gtacggcg", + "gcagctgg", + "ggcagctg", + "tcggcagc", + "cagctggt", + "tggtacgg", + "cagctggt", + "gcagctgg", + "gctggtac", + "cggcagct", + "agctggta", + "ctggtacg", + NULL +}; + +char* gene8 = "ttggtgagccgtaagactcc"; +char* segments8[] = { + "cgtaagac", + "taagactc", + "gtgagccg", + "gagccgta", + "gccgtaag", + "tgagccgt", + "gccgtaag", + "cgtaagac", + "ttggtgag", + "agccgtaa", + "gccgtaag", + "aagactcc", + "ggtgagcc", + "ttggtgag", + "agccgtaa", + "gagccgta", + "aagactcc", + "ttggtgag", + "gtaagact", + "ccgtaaga", + "ttggtgag", + "gagccgta", + "ggtgagcc", + "gagccgta", + "gccgtaag", + "aagactcc", + "gtaagact", + "ccgtaaga", + "tgagccgt", + "ttggtgag", + NULL +}; + + +static segments_t* +createSegments (char* segments[]) +{ + long i = 0; + segments_t* segmentsPtr = (segments_t*)malloc(sizeof(segments)); + + segmentsPtr->length = strlen(segments[0]); + segmentsPtr->contentsPtr = vector_alloc(1); + + while (segments[i] != NULL) { + bool_t status = vector_pushBack(segmentsPtr->contentsPtr, + (void*)segments[i]); + assert(status); + i++; + } + + segmentsPtr->minNum = vector_getSize(segmentsPtr->contentsPtr); + + return segmentsPtr; +} + + +static void +tester (char* gene, char* segments[]) +{ + segments_t* segmentsPtr; + sequencer_t* sequencerPtr; + + segmentsPtr = createSegments(segments); + sequencerPtr = sequencer_alloc(strlen(gene), segmentsPtr->length, segmentsPtr); + + sequencer_run((void*)sequencerPtr); + + printf("gene = %s\n", gene); + printf("sequence = %s\n", sequencerPtr->sequence); + assert(strcmp(sequencerPtr->sequence, gene) == 0); + + sequencer_free(sequencerPtr); +} + + +int +main () +{ + bool_t status = memory_init(1, 4, 2); + assert(status); + thread_startup(1); + + puts("Starting..."); + + /* Simple test */ + tester(gene1, segments1); + + /* Simple test with aliasing segments */ + tester(gene2, segments2); + + /* Simple test with non-overlapping segments */ + tester(gene3, segments3); + + /* Complex tests */ + tester(gene4, segments4); + tester(gene5, segments5); + tester(gene6, segments6); + tester(gene7, segments7); + tester(gene8, segments8); + + puts("Passed all tests."); + + return 0; +} + + +#endif /* TEST_SEQUENCER */ + + +/* ============================================================================= + * + * End of sequencer.c + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/sequencer.h b/Robust/src/Benchmarks/SingleTM/genome/sequencer.h new file mode 100644 index 00000000..c624d86b --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/sequencer.h @@ -0,0 +1,155 @@ +/* ============================================================================= + * + * sequencer.h + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#ifndef SEQUENCER_H +#define SEQUENCER_H 1 + + +#include "hashtable.h" +#include "segments.h" +#include "table.h" +#include "tm.h" + + +typedef struct endInfoEntry endInfoEntry_t; +typedef struct constructEntry constructEntry_t; + + +typedef struct sequencer { + +/* public: */ + + char* sequence; + +/* private: */ + + segments_t* segmentsPtr; + + /* For removing duplicate segments */ + hashtable_t* uniqueSegmentsPtr; + + /* For matching segments */ + endInfoEntry_t* endInfoEntries; + table_t** startHashToConstructEntryTables; + + /* For constructing sequence */ + constructEntry_t* constructEntries; + table_t* hashToConstructEntryTable; + + /* For deallocation */ + long segmentLength; + +} sequencer_t; + + +typedef struct sequencer_run_arg { + sequencer_t* sequencerPtr; + segments_t* segmentsPtr; + long preAllocLength; + char* returnSequence; /* variable stores return value */ +} sequencer_run_arg_t; + + +/* ============================================================================= + * sequencer_alloc + * -- Returns NULL on failure + * ============================================================================= + */ +sequencer_t* +sequencer_alloc (long geneLength, long segmentLength, segments_t* segmentsPtr); + + +/* ============================================================================= + * sequencer_run + * ============================================================================= + */ + +void +sequencer_run (void* argPtr); + + +/* ============================================================================= + * sequencer_free + * ============================================================================= + */ +void +sequencer_free (sequencer_t* sequencerPtr); + + +#endif /* SEQUENCER_H */ + + +/* ============================================================================= + * + * End of sequencer.h + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/sequencer.h~ b/Robust/src/Benchmarks/SingleTM/genome/sequencer.h~ new file mode 100644 index 00000000..c624d86b --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/sequencer.h~ @@ -0,0 +1,155 @@ +/* ============================================================================= + * + * sequencer.h + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#ifndef SEQUENCER_H +#define SEQUENCER_H 1 + + +#include "hashtable.h" +#include "segments.h" +#include "table.h" +#include "tm.h" + + +typedef struct endInfoEntry endInfoEntry_t; +typedef struct constructEntry constructEntry_t; + + +typedef struct sequencer { + +/* public: */ + + char* sequence; + +/* private: */ + + segments_t* segmentsPtr; + + /* For removing duplicate segments */ + hashtable_t* uniqueSegmentsPtr; + + /* For matching segments */ + endInfoEntry_t* endInfoEntries; + table_t** startHashToConstructEntryTables; + + /* For constructing sequence */ + constructEntry_t* constructEntries; + table_t* hashToConstructEntryTable; + + /* For deallocation */ + long segmentLength; + +} sequencer_t; + + +typedef struct sequencer_run_arg { + sequencer_t* sequencerPtr; + segments_t* segmentsPtr; + long preAllocLength; + char* returnSequence; /* variable stores return value */ +} sequencer_run_arg_t; + + +/* ============================================================================= + * sequencer_alloc + * -- Returns NULL on failure + * ============================================================================= + */ +sequencer_t* +sequencer_alloc (long geneLength, long segmentLength, segments_t* segmentsPtr); + + +/* ============================================================================= + * sequencer_run + * ============================================================================= + */ + +void +sequencer_run (void* argPtr); + + +/* ============================================================================= + * sequencer_free + * ============================================================================= + */ +void +sequencer_free (sequencer_t* sequencerPtr); + + +#endif /* SEQUENCER_H */ + + +/* ============================================================================= + * + * End of sequencer.h + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/sequencer.o b/Robust/src/Benchmarks/SingleTM/genome/sequencer.o new file mode 100644 index 00000000..b6aae4a1 Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/sequencer.o differ diff --git a/Robust/src/Benchmarks/SingleTM/genome/staticTest b/Robust/src/Benchmarks/SingleTM/genome/staticTest new file mode 100755 index 00000000..08b4cfe6 Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/staticTest differ diff --git a/Robust/src/Benchmarks/SingleTM/genome/staticTest.c b/Robust/src/Benchmarks/SingleTM/genome/staticTest.c new file mode 100644 index 00000000..4669673b --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/staticTest.c @@ -0,0 +1,15 @@ +#include + +void function() { + static int myInt = 1; + printf("myInt:%d\n", myInt); + myInt *= 2; + printf("myInt:%d\n", myInt); +} + +int main(int argc,char *argv[]) +{ + function(); + function(); + function(); +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/staticTest.c~ b/Robust/src/Benchmarks/SingleTM/genome/staticTest.c~ new file mode 100644 index 00000000..ca5630cb --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/staticTest.c~ @@ -0,0 +1,15 @@ +#include + +void function() { + static int myInt = 0; + printf("myInt:%d\n", myInt); + myInt *= 2; + printf("myInt:%d\n", myInt); +} + +int main(int argc,char *argv[]) +{ + function(); + function(); + function(); +} diff --git a/Robust/src/Benchmarks/SingleTM/genome/table.c b/Robust/src/Benchmarks/SingleTM/genome/table.c new file mode 100644 index 00000000..d4e71d86 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/table.c @@ -0,0 +1,263 @@ +/* ============================================================================= + * + * table.c + * -- Fixed-size hash table + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#include +#include +#include "list.h" +#include "table.h" +#include "types.h" + + +/* ============================================================================= + * table_alloc + * -- Returns NULL on failure + * ============================================================================= + */ +table_t* +table_alloc (long numBucket, long (*compare)(const void*, const void*)) +{ + table_t* tablePtr; + long i; + + tablePtr = (table_t*)malloc(sizeof(table_t)); + if (tablePtr == NULL) { + return NULL; + } + + tablePtr->buckets = (list_t**)malloc(numBucket * sizeof(list_t*)); + if (tablePtr->buckets == NULL) { + return NULL; + } + + for (i = 0; i < numBucket; i++) { + tablePtr->buckets[i] = list_alloc(compare); + if (tablePtr->buckets[i] == NULL) { + return NULL; + } + } + + tablePtr->numBucket = numBucket; + + return tablePtr; +} + + +/* ============================================================================= + * table_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +table_insert (table_t* tablePtr, ulong_t hash, void* dataPtr) +{ + long i = hash % tablePtr->numBucket; + + if (!list_insert(tablePtr->buckets[i], dataPtr)) { + return FALSE; + } + + return TRUE; +} + + +/* ============================================================================= + * TMtable_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +TMtable_insert (TM_ARGDECL table_t* tablePtr, ulong_t hash, void* dataPtr) +{ + long i = hash % tablePtr->numBucket; + + if (!TMLIST_INSERT(tablePtr->buckets[i], dataPtr)) { + return FALSE; + } + + return TRUE; +} + + +/* ============================================================================= + * table_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +table_remove (table_t* tablePtr, ulong_t hash, void* dataPtr) +{ + long i = hash % tablePtr->numBucket; + + if (!list_remove(tablePtr->buckets[i], dataPtr)) { + return FALSE; + } + + return TRUE; +} + + +/* ============================================================================= + * table_free + * ============================================================================= + */ +void +table_free (table_t* tablePtr) +{ +#if 0 + /* TODO: fix mixed sequential/parallel allocation */ + long i; + + for (i = 0; i < tablePtr->numBucket; i++) { + list_free(tablePtr->buckets[i]); + } +#endif + + free(tablePtr); +} + + +/* ============================================================================= + * TEST_TABLE + * ============================================================================= + */ +#ifdef TEST_TABLE + + +#include + + +static void +printTable (table_t* tablePtr) +{ + long i; + + for (i = 0; i < tablePtr->numBucket; i++) { + list_iter_t it; + printf("%2i: [", i); + list_iter_reset(&it, tablePtr->buckets[i]); + while (list_iter_hasNext(&it, tablePtr->buckets[i])) { + printf("%li ", *(long*)list_iter_next(&it, tablePtr->buckets[i])); + } + puts("]"); + } +} + + +int +main () +{ + table_t* tablePtr; + long hash[] = {3, 1, 4, 1, 5, 9, 2, 6, 8, 7, -1}; + long i; + + bool_t status = memory_init(1, 4, 2); + assert(status); + + puts("Starting..."); + + tablePtr = table_alloc(8, NULL); + + for (i = 0; hash[i] >= 0; i++ ) { + bool_t status = table_insert(tablePtr, + (ulong_t)hash[i], + (void*)&hash[i]) + assert(status); + printTable(tablePtr); + puts(""); + } + + for (i = 0; hash[i] >= 0; i++ ) { + bool_t status = table_remove(tablePtr, + (ulong_t)hash[i], + (void*)&hash[i]) + assert(status); + printTable(tablePtr); + puts(""); + } + + table_free(tablePtr); + + puts("Done."); + + return 0; +} + + +#endif /* TEST_TABLE */ + + +/* ============================================================================= + * + * End of table.c + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/table.h b/Robust/src/Benchmarks/SingleTM/genome/table.h new file mode 100644 index 00000000..a2b4c839 --- /dev/null +++ b/Robust/src/Benchmarks/SingleTM/genome/table.h @@ -0,0 +1,142 @@ +/* ============================================================================= + * + * table.h + * -- Fixed-size hash table + * + * ============================================================================= + * + * Copyright (C) Stanford University, 2006. All Rights Reserved. + * Author: Chi Cao Minh + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + + +#ifndef TABLE_H +#define TABLE_H 1 + + +#include "list.h" +#include "types.h" + + +typedef struct table { + list_t** buckets; + long numBucket; +} table_t; + + +/* ============================================================================= + * table_alloc + * -- Returns NULL on failure + * ============================================================================= + */ +table_t* +table_alloc (long numBucket, long (*compare)(const void*, const void*)); + + +/* ============================================================================= + * table_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +table_insert (table_t* tablePtr, ulong_t hash, void* dataPtr); + + +/* ============================================================================= + * TMtable_insert + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +TMtable_insert (TM_ARGDECL table_t* tablePtr, ulong_t hash, void* dataPtr); + + +/* ============================================================================= + * table_remove + * -- Returns TRUE if successful, else FALSE + * ============================================================================= + */ +bool_t +table_remove (table_t* tablePtr, ulong_t hash, void* dataPtr); + + +/* ============================================================================= + * table_free + * ============================================================================= + */ +void +table_free (table_t* tablePtr); + + +#define TMTABLE_INSERT(t, h, d) TMtable_insert(TM_ARG t, h, d) + + +#endif /* TABLE_H */ + + +/* ============================================================================= + * + * End of table.h + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/SingleTM/genome/table.o b/Robust/src/Benchmarks/SingleTM/genome/table.o new file mode 100644 index 00000000..269c4b6b Binary files /dev/null and b/Robust/src/Benchmarks/SingleTM/genome/table.o differ