--- /dev/null
+# ==============================================================================
+#
+# Defines.common.mk
+#
+# ==============================================================================
+
+
+CFLAGS += -DLIST_NO_DUPLICATES
+CFLAGS += -DCHUNK_STEP1=12
+
+PROG := genome
+
+SRCS += \
+ gene.c \
+ genome.c \
+ segments.c \
+ sequencer.c \
+ table.c \
+ $(LIB)/bitmap.c \
+ $(LIB)/hash.c \
+ $(LIB)/hashtable.c \
+ $(LIB)/pair.c \
+ $(LIB)/random.c \
+ $(LIB)/list.c \
+ $(LIB)/mt19937ar.c \
+ $(LIB)/thread.c \
+ $(LIB)/vector.c \
+#
+OBJS := ${SRCS:.c=.o}
+
+
+# ==============================================================================
+#
+# End of Defines.common.mk
+#
+# ==============================================================================
--- /dev/null
+# ==============================================================================
+#
+# Makefile.seq
+#
+# ==============================================================================
+
+
+include ../common/Defines.common.mk
+include ./Defines.common.mk
+include ../common/Makefile.seq
+
+
+# ==============================================================================
+#
+# Makefile.seq
+#
+# ==============================================================================
+
--- /dev/null
+# ==============================================================================
+#
+# Makefile.stm
+#
+# ==============================================================================
+
+
+include ../common/Defines.common.mk
+include ./Defines.common.mk
+include ../common/Makefile.stm
+
+
+# ==============================================================================
+#
+# End of Makefile.stm
+#
+# ==============================================================================
--- /dev/null
+# ==============================================================================
+#
+# Makefile.stm.otm
+#
+# ==============================================================================
+
+
+include ../common/Defines.common.otm.mk
+include ./Defines.common.mk
+include ../common/Makefile.stm.otm
+
+
+# ==============================================================================
+#
+# End of Makefile.stm.otm
+#
+# ==============================================================================
--- /dev/null
+Introduction
+------------
+
+This benchmark implements a gene sequencing program that reconstructs the gene
+sequence from segments of a larger gene.
+
+For example, given the segments TCGG, GCAG, ATCG, CAGC, and GATC, the program
+will try to construct the shortest gene that can be made from them.
+
+For example, if we slide around the above segments we can get:
+
+ TCGG
+ GCAG
+ ATCG
+ CAGC
+ GATC
+ =============
+ CAGCAGATCGG
+
+
+This gives a final sequence of length 11. Another possible solution is:
+
+ TCGG
+ GCAG
+ ATCG
+ CAGC
+ GATC
+ =============
+ GATCGGCAGC
+
+This solution has length 10. Both are consistent with the segments provided,
+but the second is the optimal solution since it is shorter.
+
+The algorithm used to sequence the gene has three phases:
+
+ 1) Remove duplicate segments by using hash-set
+ 2) Match segments using Rabin-Karp string search algorithm [3]
+ - Cycles are prevented by tracking starts/ends of matched chains
+ 3) Build sequence
+
+The first two steps make up the bulk of the execution time and are parallelized.
+
+
+Compiling and Running
+---------------------
+
+To build the application, simply run:
+
+ make -f <makefile>
+
+in the source directory. For example, for the sequential flavor, run:
+
+ make -f Makefile.seq
+
+By default, this produces an executable named "genome", which can then be
+run in the following manner:
+
+ ./genome -g <gene_length> \
+ -s <segment_length> \
+ -n <number_of_segments> \
+ -t <number_of_threads>
+
+To produce the data in [1] and [2], the following values were used:
+
+ -g256 -s16 -n16384
+
+For running without a simulator, use the default values:
+
+ -g16384 -s64 -n16777216
+
+
+Workload Size
+-------------
+
+The size of the workload is determined by the -g, -s, and -n options. The
+gene sequencing example in "Introduction", would correspond to -g10 -s4 -n5.
+In general, the values for these three options should follow the following
+relationship: -s << -g << -n. Larger values increase the size of the workload.
+
+
+References
+----------
+
+[1] C. Cao Minh, J. Chung, C. Kozyrakis, and K. Olukotun. STAMP: Stanford
+ Transactional Applications for Multi-processing. In IISWC '08: Proceedings
+ of The IEEE International Symposium on Workload Characterization,
+ September 2008.
+
+[2] C. Cao Minh, M. Trautmann, J. Chung, A. McDonald, N. Bronson, J. Casper,
+ C. Kozyrakis, and K. Olukotun. An Effective Hybrid Transactional Memory
+ System with Strong Isolation Guarantees. In Proceedings of the 34th Annual
+ International Symposium on Computer Architecture, 2007.
+
+[3] R. M. Karp and M. O. Rabin. Efficient randomized pattern-matching
+ algorithms. IBM Journal of Research and Development, 1987.
--- /dev/null
+/* =============================================================================
+ *
+ * gene.c
+ * -- Create random gene
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include "gene.h"
+#include "nucleotide.h"
+#include "random.h"
+#include "tm.h"
+
+
+/* =============================================================================
+ * gene_alloc
+ * -- Does all memory allocation necessary for gene creation
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+gene_t*
+gene_alloc (long length)
+{
+ gene_t* genePtr;
+
+ assert(length > 1);
+
+ genePtr = (gene_t*)malloc(sizeof(gene_t));
+ if (genePtr == NULL) {
+ return NULL;
+ }
+
+ genePtr->contents = (char*)malloc((length + 1) * sizeof(char));
+ if (genePtr->contents == NULL) {
+ return NULL;
+ }
+ genePtr->contents[length] = '\0';
+ genePtr->length = length;
+
+ genePtr->startBitmapPtr = bitmap_alloc(length);
+ if (genePtr->startBitmapPtr == NULL) {
+ return NULL;
+ }
+
+ return genePtr;
+}
+
+
+/* =============================================================================
+ * gene_create
+ * -- Populate contents with random gene
+ * =============================================================================
+ */
+void
+gene_create (gene_t* genePtr, random_t* randomPtr)
+{
+ long length;
+ char* contents;
+ long i;
+ const char nucleotides[] = {
+ NUCLEOTIDE_ADENINE,
+ NUCLEOTIDE_CYTOSINE,
+ NUCLEOTIDE_GUANINE,
+ NUCLEOTIDE_THYMINE,
+ };
+
+ assert(genePtr != NULL);
+ assert(randomPtr != NULL);
+
+ length = genePtr->length;
+ contents = genePtr->contents;
+
+ for (i = 0; i < length; i++) {
+ contents[i] =
+ nucleotides[(random_generate(randomPtr)% NUCLEOTIDE_NUM_TYPE)];
+ }
+}
+
+
+/* =============================================================================
+ * gene_free
+ * =============================================================================
+ */
+void
+gene_free (gene_t* genePtr)
+{
+ bitmap_free(genePtr->startBitmapPtr);
+ free(genePtr->contents);
+ free(genePtr);
+}
+
+
+/* =============================================================================
+ * TEST_GENE
+ * =============================================================================
+ */
+#ifdef TEST_GENE
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+
+int
+main ()
+{
+ gene_t* gene1Ptr;
+ gene_t* gene2Ptr;
+ gene_t* gene3Ptr;
+ random_t* randomPtr;
+
+ bool_t status = memory_init(1, 4, 2);
+ assert(status);
+
+ puts("Starting...");
+
+ gene1Ptr = gene_alloc(10);
+ gene2Ptr = gene_alloc(10);
+ gene3Ptr = gene_alloc(9);
+ randomPtr = random_alloc();
+
+ random_seed(randomPtr, 0);
+ gene_create(gene1Ptr, randomPtr);
+ random_seed(randomPtr, 1);
+ gene_create(gene2Ptr, randomPtr);
+ random_seed(randomPtr, 0);
+ gene_create(gene3Ptr, randomPtr);
+
+ assert(gene1Ptr->length == strlen(gene1Ptr->contents));
+ assert(gene2Ptr->length == strlen(gene2Ptr->contents));
+ assert(gene3Ptr->length == strlen(gene3Ptr->contents));
+
+ assert(gene1Ptr->length == gene2Ptr->length);
+ assert(strcmp(gene1Ptr->contents, gene2Ptr->contents) != 0);
+
+ assert(gene1Ptr->length == (gene3Ptr->length + 1));
+ assert(strcmp(gene1Ptr->contents, gene3Ptr->contents) != 0);
+ assert(strncmp(gene1Ptr->contents,
+ gene3Ptr->contents,
+ gene3Ptr->length) == 0);
+
+ gene_free(gene1Ptr);
+ gene_free(gene2Ptr);
+ gene_free(gene3Ptr);
+ random_free(randomPtr);
+
+ puts("All tests passed.");
+
+ return 0;
+}
+
+
+#endif /* TEST_GENE */
+
+
+/* =============================================================================
+ *
+ * End of gene.c
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * gene.h
+ * -- Create random gene
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#ifndef GENE_H
+#define GENE_H 1
+
+
+#include "bitmap.h"
+#include "random.h"
+
+
+typedef struct gene {
+ long length;
+ char* contents;
+ bitmap_t* startBitmapPtr; /* used for creating segments */
+} gene_t;
+
+
+/* =============================================================================
+ * gene_alloc
+ * -- Does all memory allocation necessary for gene creation
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+gene_t*
+gene_alloc (long length);
+
+
+/* =============================================================================
+ * gene_create
+ * -- Populate contents with random gene
+ * =============================================================================
+ */
+void
+gene_create (gene_t* genePtr, random_t* randomPtr);
+
+
+/* =============================================================================
+ * gene_free
+ * =============================================================================
+ */
+void
+gene_free (gene_t* genePtr);
+
+
+#endif /* GENE_H */
+
+
+/* =============================================================================
+ *
+ * End of gene.h
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * genome.c
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#include <assert.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gene.h"
+#include "random.h"
+#include "segments.h"
+#include "sequencer.h"
+#include "thread.h"
+#include "timer.h"
+#include "tm.h"
+#include "vector.h"
+
+
+enum param_types {
+ PARAM_GENE = (unsigned char)'g',
+ PARAM_NUMBER = (unsigned char)'n',
+ PARAM_SEGMENT = (unsigned char)'s',
+ PARAM_THREAD = (unsigned char)'t',
+};
+
+
+#define PARAM_DEFAULT_GENE (1L << 14)
+#define PARAM_DEFAULT_NUMBER (1L << 22)
+#define PARAM_DEFAULT_SEGMENT (1L << 6)
+#define PARAM_DEFAULT_THREAD (1L)
+
+
+long global_params[256]; /* 256 = ascii limit */
+
+
+/* =============================================================================
+ * displayUsage
+ * =============================================================================
+ */
+static void
+displayUsage (const char* appName)
+{
+ printf("Usage: %s [options]\n", appName);
+ puts("\nOptions: (defaults)\n");
+ printf(" g <UINT> Length of [g]ene (%li)\n", PARAM_DEFAULT_GENE);
+ printf(" n <UINT> Min [n]umber of segments (%li)\n", PARAM_DEFAULT_NUMBER);
+ printf(" s <UINT> Length of [s]egment (%li)\n", PARAM_DEFAULT_SEGMENT);
+ printf(" t <UINT> Number of [t]hreads (%li)\n", PARAM_DEFAULT_THREAD);
+ puts("");
+ puts("The actual number of segments created may be greater than -n");
+ puts("in order to completely cover the gene.");
+ exit(1);
+}
+
+
+/* =============================================================================
+ * setDefaultParams
+ * =============================================================================
+ */
+static void
+setDefaultParams( void )
+{
+ global_params[PARAM_GENE] = PARAM_DEFAULT_GENE;
+ global_params[PARAM_NUMBER] = PARAM_DEFAULT_NUMBER;
+ global_params[PARAM_SEGMENT] = PARAM_DEFAULT_SEGMENT;
+ global_params[PARAM_THREAD] = PARAM_DEFAULT_THREAD;
+}
+
+
+/* =============================================================================
+ * parseArgs
+ * =============================================================================
+ */
+static void
+parseArgs (long argc, char* const argv[])
+{
+ long i;
+ long opt;
+
+ opterr = 0;
+
+ setDefaultParams();
+
+ while ((opt = getopt(argc, argv, "g:n:s:t:")) != -1) {
+ switch (opt) {
+ case 'g':
+ case 'n':
+ case 's':
+ case 't':
+ global_params[(unsigned char)opt] = atol(optarg);
+ break;
+ case '?':
+ default:
+ opterr++;
+ break;
+ }
+ }
+
+ for (i = optind; i < argc; i++) {
+ fprintf(stderr, "Non-option argument: %s\n", argv[i]);
+ opterr++;
+ }
+
+ if (opterr) {
+ displayUsage(argv[0]);
+ }
+}
+
+
+/* =============================================================================
+ * main
+ * =============================================================================
+ */
+MAIN (argc,argv)
+{
+ TIMER_T start;
+ TIMER_T stop;
+
+ GOTO_REAL();
+
+ /* Initialization */
+ parseArgs(argc, (char** const)argv);
+ SIM_GET_NUM_CPU(global_params[PARAM_THREAD]);
+
+ printf("Creating gene and segments... ");
+ fflush(stdout);
+
+ long geneLength = global_params[PARAM_GENE];
+ long segmentLength = global_params[PARAM_SEGMENT];
+ long minNumSegment = global_params[PARAM_NUMBER];
+ long numThread = global_params[PARAM_THREAD];
+
+ TM_STARTUP(numThread);
+ P_MEMORY_STARTUP(numThread);
+ thread_startup(numThread);
+
+ random_t* randomPtr = random_alloc();
+ assert(randomPtr != NULL);
+ random_seed(randomPtr, 0);
+
+ gene_t* genePtr = gene_alloc(geneLength);
+ assert( genePtr != NULL);
+ gene_create(genePtr, randomPtr);
+ char* gene = genePtr->contents;
+
+ segments_t* segmentsPtr = segments_alloc(segmentLength, minNumSegment);
+ assert(segmentsPtr != NULL);
+ segments_create(segmentsPtr, genePtr, randomPtr);
+ sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr);
+ assert(sequencerPtr != NULL);
+
+ puts("done.");
+ printf("Gene length = %li\n", genePtr->length);
+ printf("Segment length = %li\n", segmentsPtr->length);
+ printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr));
+ fflush(stdout);
+
+ /* Benchmark */
+ printf("Sequencing gene... ");
+ fflush(stdout);
+ TIMER_READ(start);
+ GOTO_SIM();
+#ifdef OTM
+#pragma omp parallel
+ {
+ sequencer_run(sequencerPtr);
+ }
+#else
+ thread_start(sequencer_run, (void*)sequencerPtr);
+#endif
+ GOTO_REAL();
+ TIMER_READ(stop);
+ puts("done.");
+ printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop));
+ fflush(stdout);
+
+ /* Check result */
+ {
+ char* sequence = sequencerPtr->sequence;
+ int result = strcmp(gene, sequence);
+ printf("Sequence matches gene: %s\n", (result ? "no" : "yes"));
+ if (result) {
+ printf("gene = %s\n", gene);
+ printf("sequence = %s\n", sequence);
+ }
+ fflush(stdout);
+ assert(strlen(sequence) >= strlen(gene));
+ }
+
+ /* Clean up */
+ printf("Deallocating memory... ");
+ fflush(stdout);
+ sequencer_free(sequencerPtr);
+ segments_free(segmentsPtr);
+ gene_free(genePtr);
+ random_free(randomPtr);
+ puts("done.");
+ fflush(stdout);
+
+ TM_SHUTDOWN();
+ P_MEMORY_SHUTDOWN();
+
+ GOTO_SIM();
+
+ thread_shutdown();
+
+ MAIN_RETURN(0);
+}
+
+
+
+/* =============================================================================
+ *
+ * End of genome.c
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * genome.c
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#include <assert.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gene.h"
+#include "random.h"
+#include "segments.h"
+#include "sequencer.h"
+#include "thread.h"
+#include "timer.h"
+#include "tm.h"
+#include "vector.h"
+
+
+enum param_types {
+ PARAM_GENE = (unsigned char)'g',
+ PARAM_NUMBER = (unsigned char)'n',
+ PARAM_SEGMENT = (unsigned char)'s',
+ PARAM_THREAD = (unsigned char)'t',
+};
+
+
+#define PARAM_DEFAULT_GENE (1L << 14)
+#define PARAM_DEFAULT_NUMBER (1L << 22)
+#define PARAM_DEFAULT_SEGMENT (1L << 6)
+#define PARAM_DEFAULT_THREAD (1L)
+
+
+long global_params[256]; /* 256 = ascii limit */
+
+
+/* =============================================================================
+ * displayUsage
+ * =============================================================================
+ */
+static void
+displayUsage (const char* appName)
+{
+ printf("Usage: %s [options]\n", appName);
+ puts("\nOptions: (defaults)\n");
+ printf(" g <UINT> Length of [g]ene (%li)\n", PARAM_DEFAULT_GENE);
+ printf(" n <UINT> Min [n]umber of segments (%li)\n", PARAM_DEFAULT_NUMBER);
+ printf(" s <UINT> Length of [s]egment (%li)\n", PARAM_DEFAULT_SEGMENT);
+ printf(" t <UINT> Number of [t]hreads (%li)\n", PARAM_DEFAULT_THREAD);
+ puts("");
+ puts("The actual number of segments created may be greater than -n");
+ puts("in order to completely cover the gene.");
+ exit(1);
+}
+
+
+/* =============================================================================
+ * setDefaultParams
+ * =============================================================================
+ */
+static void
+setDefaultParams( void )
+{
+ global_params[PARAM_GENE] = PARAM_DEFAULT_GENE;
+ global_params[PARAM_NUMBER] = PARAM_DEFAULT_NUMBER;
+ global_params[PARAM_SEGMENT] = PARAM_DEFAULT_SEGMENT;
+ global_params[PARAM_THREAD] = PARAM_DEFAULT_THREAD;
+}
+
+
+/* =============================================================================
+ * parseArgs
+ * =============================================================================
+ */
+static void
+parseArgs (long argc, char* const argv[])
+{
+ long i;
+ long opt;
+
+ opterr = 0;
+
+ setDefaultParams();
+
+ while ((opt = getopt(argc, argv, "g:n:s:t:")) != -1) {
+ switch (opt) {
+ case 'g':
+ case 'n':
+ case 's':
+ case 't':
+ global_params[(unsigned char)opt] = atol(optarg);
+ break;
+ case '?':
+ default:
+ opterr++;
+ break;
+ }
+ }
+
+ for (i = optind; i < argc; i++) {
+ fprintf(stderr, "Non-option argument: %s\n", argv[i]);
+ opterr++;
+ }
+
+ if (opterr) {
+ displayUsage(argv[0]);
+ }
+}
+
+
+/* =============================================================================
+ * main
+ * =============================================================================
+ */
+MAIN (argc,argv)
+{
+ TIMER_T start;
+ TIMER_T stop;
+
+ GOTO_REAL();
+
+ /* Initialization */
+ parseArgs(argc, (char** const)argv);
+ SIM_GET_NUM_CPU(global_params[PARAM_THREAD]);
+
+ printf("Creating gene and segments... ");
+ fflush(stdout);
+
+ long geneLength = global_params[PARAM_GENE];
+ long segmentLength = global_params[PARAM_SEGMENT];
+ long minNumSegment = global_params[PARAM_NUMBER];
+ long numThread = global_params[PARAM_THREAD];
+
+ TM_STARTUP(numThread);
+ P_MEMORY_STARTUP(numThread);
+ thread_startup(numThread);
+
+ random_t* randomPtr = random_alloc();
+ assert(randomPtr != NULL);
+ random_seed(randomPtr, 0);
+
+ gene_t* genePtr = gene_alloc(geneLength);
+ assert( genePtr != NULL);
+ gene_create(genePtr, randomPtr);
+ char* gene = genePtr->contents;
+
+ segments_t* segmentsPtr = segments_alloc(segmentLength, minNumSegment);
+ assert(segmentsPtr != NULL);
+ segments_create(segmentsPtr, genePtr, randomPtr);
+ sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr);
+ assert(sequencerPtr != NULL);
+
+ puts("done.");
+ printf("Gene length = %li\n", genePtr->length);
+ printf("Segment length = %li\n", segmentsPtr->length);
+ printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr));
+ fflush(stdout);
+
+ /* Benchmark */
+ printf("Sequencing gene... ");
+ fflush(stdout);
+ TIMER_READ(start);
+ GOTO_SIM();
+#ifdef OTM
+#pragma omp parallel
+ {
+ sequencer_run(sequencerPtr);
+ }
+#else
+ thread_start(sequencer_run, (void*)sequencerPtr);
+#endif
+ GOTO_REAL();
+ TIMER_READ(stop);
+ puts("done.");
+ printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop));
+ fflush(stdout);
+
+ /* Check result */
+ {
+ char* sequence = sequencerPtr->sequence;
+ int result = strcmp(gene, sequence);
+ printf("Sequence matches gene: %s\n", (result ? "no" : "yes"));
+ if (result) {
+ printf("gene = %s\n", gene);
+ printf("sequence = %s\n", sequence);
+ }
+ fflush(stdout);
+ assert(strlen(sequence) >= strlen(gene));
+ }
+
+ /* Clean up */
+ printf("Deallocating memory... ");
+ fflush(stdout);
+ sequencer_free(sequencerPtr);
+ segments_free(segmentsPtr);
+ gene_free(genePtr);
+ random_free(randomPtr);
+ puts("done.");
+ fflush(stdout);
+
+ TM_SHUTDOWN();
+ P_MEMORY_SHUTDOWN();
+
+ GOTO_SIM();
+
+ thread_shutdown();
+
+ MAIN_RETURN(0);
+}
+
+
+
+/* =============================================================================
+ *
+ * End of genome.c
+ *
+ * =============================================================================
+ */
--- /dev/null
+public class Bitmap {
+ public long numBit;
+ public long numWord;
+ public long bits[];
+
+ private static NUM_BIT_PER_BYTE = 8;
+ private static NUM_BIT_PER_WORD = (8) * NUM_BIT_PER_BYTE)
+
+
+ /* =============================================================================
+ * bitmap_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Bitmap(long myNumBit) {
+
+ numBit = myNumBit;
+ numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD);
+
+ bits = new long[numWord];
+
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = 0;
+ }
+ }
+
+ Bitmap(Bitmap myBitMap) {
+ numBit = myBitMap.numBit;
+ numWord = myBitMap.numWord;
+ bits = new long[numWord];
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = myBitMap.bits[i];
+ }
+ }
+
+ /* =============================================================================
+ * Pbitmap_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ //bitmap_t* Pbitmap_alloc (long numBit) { }
+
+
+ /* =============================================================================
+ * bitmap_free
+ * =============================================================================
+ */
+ //void bitmap_free (bitmap_t* bitmapPtr);
+
+
+ /* =============================================================================
+ * Pbitmap_free
+ * =============================================================================
+ */
+ //void Pbitmap_free (bitmap_t* bitmapPtr);
+
+
+ /* =============================================================================
+ * bitmap_set
+ * -- Sets ith bit to 1
+ * -- Returns TRUE on success, else FALSE
+ * =============================================================================
+ */
+ boolean set (long i) {
+ if ((i < 0) || (i >= numBit)) {
+ return FALSE;
+ }
+
+ bits[i/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD));
+
+ return TRUE;
+ }
+
+
+ /* =============================================================================
+ * bitmap_clear
+ * -- Clears ith bit to 0
+ * -- Returns TRUE on success, else FALSE
+ * =============================================================================
+ */
+ boolean clear (long i) {
+ if ((i < 0) || (i >= numBit)) {
+ return FALSE;
+ }
+
+ bits[i/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD));
+
+ return TRUE;
+ }
+
+
+ /* =============================================================================
+ * bitmap_clearAll
+ * -- Clears all bit to 0
+ * =============================================================================
+ */
+ void clearAll () {
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = 0;
+ }
+ }
+
+
+ /* =============================================================================
+ * bitmap_isSet
+ * -- Returns TRUE if ith bit is set, else FALSE
+ * =============================================================================
+ */
+ boolean isSet (long i) {
+ if ((i >= 0) && (i < numBit) &&
+ (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) {
+ return TRUE;
+ }
+
+ return FALSE;
+ }
+
+
+ /* =============================================================================
+ * bitmap_findClear
+ * -- Returns index of first clear bit
+ * -- If start index is negative, will start from beginning
+ * -- If all bits are set, returns -1
+ * =============================================================================
+ */
+ long findClear (long startIndex) {
+ long i;
+
+ for (i = MAX(startIndex, 0); i < numBit; i++) {
+ if (!(bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+
+ /* =============================================================================
+ * bitmap_findSet
+ * -- Returns index of first set bit
+ * -- If all bits are clear, returns -1
+ * =============================================================================
+ */
+ long findSet (long startIndex) {
+ long i;
+
+ for (i = MAX(startIndex, 0); i < numBit; i++) {
+ if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+
+ /* =============================================================================
+ * bitmap_getNumClear
+ * =============================================================================
+ */
+ long getNumClear () {
+ return (numBit - getNumSet());
+ }
+
+
+ /* =============================================================================
+ * bitmap_getNumSet
+ * =============================================================================
+ */
+ long getNumSet () {
+ long i;
+ long count = 0;
+
+ for (i = 0; i < numBit; i++) {
+ if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) {
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ /* =============================================================================
+ * bitmap_copy
+ * =============================================================================
+ */
+ //void copy(bitmap_t* dstPtr, bitmap_t* srcPtr);
+ // SEE COPY CONSTRUCTOR
+
+ /* =============================================================================
+ * bitmap_toggleAll
+ * =============================================================================
+ */
+ void toggleAll () {
+ long w;
+ for (w = 0; w < numWord; w++) {
+ bits[w] ^= -1L;
+ }
+ }
+
+ long DIVIDE_AND_ROUND_UP(long a, long b) {
+ return (a/b) + (((a % b) > 0) ? (1) : (0));
+ }
+
+ long MAX(long a, long b) {
+ return (a > b) ? a : b;
+ }
+}
--- /dev/null
+public class Bitmap {
+ public long numBit;
+ public long numWord;
+ public long bits[];
+
+ private static NUM_BIT_PER_BYTE = 8;
+ private static NUM_BIT_PER_WORD = (8) * NUM_BIT_PER_BYTE)
+
+
+ /* =============================================================================
+ * bitmap_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Bitmap(long myNumBit) {
+
+ numBit = myNumBit;
+ numWord = DIVIDE_AND_ROUND_UP(numBit, NUM_BIT_PER_WORD);
+
+ bits = new long[numWord];
+
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = 0;
+ }
+ }
+
+ Bitmap(Bitmap myBitMap) {
+ numBit = myBitMap.numBit;
+ numWord = myBitMap.numWord;
+ bits = new long[numWord];
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = myBitMap.bits[i];
+ }
+ }
+
+ /* =============================================================================
+ * Pbitmap_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ //bitmap_t* Pbitmap_alloc (long numBit) { }
+
+
+ /* =============================================================================
+ * bitmap_free
+ * =============================================================================
+ */
+ //void bitmap_free (bitmap_t* bitmapPtr);
+
+
+ /* =============================================================================
+ * Pbitmap_free
+ * =============================================================================
+ */
+ //void Pbitmap_free (bitmap_t* bitmapPtr);
+
+
+ /* =============================================================================
+ * bitmap_set
+ * -- Sets ith bit to 1
+ * -- Returns TRUE on success, else FALSE
+ * =============================================================================
+ */
+ boolean set (long i) {
+ if ((i < 0) || (i >= numBit)) {
+ return FALSE;
+ }
+
+ bits[i/NUM_BIT_PER_WORD] |= (1 << (i % NUM_BIT_PER_WORD));
+
+ return TRUE;
+ }
+
+
+ /* =============================================================================
+ * bitmap_clear
+ * -- Clears ith bit to 0
+ * -- Returns TRUE on success, else FALSE
+ * =============================================================================
+ */
+ boolean clear (long i) {
+ if ((i < 0) || (i >= numBit)) {
+ return FALSE;
+ }
+
+ bits[i/NUM_BIT_PER_WORD] &= ~(1 << (i % NUM_BIT_PER_WORD));
+
+ return TRUE;
+ }
+
+
+ /* =============================================================================
+ * bitmap_clearAll
+ * -- Clears all bit to 0
+ * =============================================================================
+ */
+ void clearAll () {
+ int i = 0;
+ for(i = 0; i < numWord; i++) {
+ bits[i] = 0;
+ }
+ }
+
+
+ /* =============================================================================
+ * bitmap_isSet
+ * -- Returns TRUE if ith bit is set, else FALSE
+ * =============================================================================
+ */
+ boolean isSet (long i) {
+ if ((i >= 0) && (i < numBit) &&
+ (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) {
+ return TRUE;
+ }
+
+ return FALSE;
+ }
+
+
+ /* =============================================================================
+ * bitmap_findClear
+ * -- Returns index of first clear bit
+ * -- If start index is negative, will start from beginning
+ * -- If all bits are set, returns -1
+ * =============================================================================
+ */
+ long findClear (long startIndex) {
+ long i;
+
+ for (i = MAX(startIndex, 0); i < numBit; i++) {
+ if (!(bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD)))) {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+
+ /* =============================================================================
+ * bitmap_findSet
+ * -- Returns index of first set bit
+ * -- If all bits are clear, returns -1
+ * =============================================================================
+ */
+ long findSet (long startIndex) {
+ long i;
+
+ for (i = MAX(startIndex, 0); i < numBit; i++) {
+ if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+
+ /* =============================================================================
+ * bitmap_getNumClear
+ * =============================================================================
+ */
+ long getNumClear () {
+ return (numBit - getNumSet());
+ }
+
+
+ /* =============================================================================
+ * bitmap_getNumSet
+ * =============================================================================
+ */
+ long getNumSet () {
+ long i;
+ long count = 0;
+
+ for (i = 0; i < numBit; i++) {
+ if (bits[i/NUM_BIT_PER_WORD] & (1 << (i % NUM_BIT_PER_WORD))) {
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ /* =============================================================================
+ * bitmap_copy
+ * =============================================================================
+ */
+ //void copy(bitmap_t* dstPtr, bitmap_t* srcPtr);
+ // SEE COPY CONSTRUCTOR
+
+ /* =============================================================================
+ * bitmap_toggleAll
+ * =============================================================================
+ */
+ void toggleAll () {
+ long w;
+ for (w = 0; w < numWord; w++) {
+ bits[w] ^= -1L;
+ }
+ }
+
+ long DIVIDE_AND_ROUND_UP(long a, long b) {
+ return (a/b) + (((a % b) > 0) ? (1) : (0));
+ }
+
+ long MAX(long a, long b) {
+ return (a > b) ? a : b;
+ }
+}
--- /dev/null
+public class Gene {
+ public long length;
+ public String contents;
+ public Bitmap startBitmapPtr; /* used for creating segments */
+
+ Gene(long myLength) {
+ length = myLength;
+ contents = "";
+ startBitmapPtr = new BitMap(length);
+ }
+
+
+/* =============================================================================
+ * gene_create
+ * -- Populate contents with random gene
+ * =============================================================================
+ */
+ void create (Random randomObj) {
+ long i;
+ char nucleotides[] = {
+ NUCLEOTIDE_ADENINE,
+ NUCLEOTIDE_CYTOSINE,
+ NUCLEOTIDE_GUANINE,
+ NUCLEOTIDE_THYMINE,
+ };
+
+ for (i = 0; i < length; i++) {
+ contents[i] = nucleotides[(random_generate(randomObj)% NUCLEOTIDE_NUM_TYPE)];
+ }
+ }
+}
--- /dev/null
+public class Gene {
+ public long length;
+ public String contents;
+ public Bitmap startBitmapPtr; /* used for creating segments */
+
+ Gene(long myLength) {
+ length = myLength;
+ contents = "";
+ startBitmapPtr = new BitMap(length);
+ }
+
+
+/* =============================================================================
+ * gene_create
+ * -- Populate contents with random gene
+ * =============================================================================
+ */
+ void create (Random randomObj) {
+ long i;
+ char nucleotides[] = {
+ NUCLEOTIDE_ADENINE,
+ NUCLEOTIDE_CYTOSINE,
+ NUCLEOTIDE_GUANINE,
+ NUCLEOTIDE_THYMINE,
+ };
+
+ for (i = 0; i < length; i++) {
+ contents[i] =
+ nucleotides[(random_generate(randomObj)% NUCLEOTIDE_NUM_TYPE)];
+ }
+ }
+}
--- /dev/null
+/*
+"gene.h"
+"random.h"
+"segments.h"
+"sequencer.h"
+"thread.h"
+"timer.h"
+"tm.h"
+"vector.h"
+"bitmap.h"
+
+*/
+
+public class Genome {
+ long geneLength;
+ long segmentLength;
+ long minNumSegment;
+ long numThread;
+
+ Genome(String x[]) {
+ parseCmdLine(x);
+ }
+
+ public static void main(String x[]){
+
+/* TIMER_T start; */
+/* TIMER_T stop; */
+
+/* GOTO_REAL(); */
+
+ /* Initialization */
+/* parseArgs(argc, (char** const)argv); */
+/* SIM_GET_NUM_CPU(global_params[PARAM_THREAD]); */
+
+ System.out.print("Creating gene and segments... ");
+ Genome g = new Genome(x);
+
+
+/* TM_STARTUP(numThread); */
+/* P_MEMORY_STARTUP(numThread); */
+/* thread_startup(numThread); */
+
+ Random randomPtr = new Random();
+ random_alloc(randomPtr);
+ random_seed(randomPtr, 0);
+
+ Gene genePtr = new Gene(geneLength);
+ genePtr.create(randomPtr);
+ String gene = genePtr.contents;
+
+ Segments segmentsPtr = new Segments(segmentLength, minNumSegment);
+ segmentsPtr.create(genePtr, randomPtr);
+ sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr);
+ assert(sequencerPtr != NULL);
+
+ puts("done.");
+ printf("Gene length = %li\n", genePtr->length);
+ printf("Segment length = %li\n", segmentsPtr->length);
+ printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr));
+ fflush(stdout);
+
+ /* Benchmark */
+ printf("Sequencing gene... ");
+ fflush(stdout);
+ TIMER_READ(start);
+ GOTO_SIM();
+#ifdef OTM
+#pragma omp parallel
+ {
+ sequencer_run(sequencerPtr);
+ }
+#else
+ thread_start(sequencer_run, (void*)sequencerPtr);
+#endif
+ GOTO_REAL();
+ TIMER_READ(stop);
+ puts("done.");
+ printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop));
+ fflush(stdout);
+
+ /* Check result */
+ {
+ char* sequence = sequencerPtr->sequence;
+ int result = strcmp(gene, sequence);
+ printf("Sequence matches gene: %s\n", (result ? "no" : "yes"));
+ if (result) {
+ printf("gene = %s\n", gene);
+ printf("sequence = %s\n", sequence);
+ }
+ fflush(stdout);
+ assert(strlen(sequence) >= strlen(gene));
+ }
+
+ /* Clean up */
+ printf("Deallocating memory... ");
+ fflush(stdout);
+ sequencer_free(sequencerPtr);
+ segments_free(segmentsPtr);
+ gene_free(genePtr);
+ random_free(randomPtr);
+ puts("done.");
+ fflush(stdout);
+
+ TM_SHUTDOWN();
+ P_MEMORY_SHUTDOWN();
+
+ GOTO_SIM();
+
+ thread_shutdown();
+
+ MAIN_RETURN(0);
+ }
+
+ public static void parseCmdLine(String args[]) {
+
+ int i = 0;
+ String arg;
+ while (i < args.length && args[i].startsWith("-")) {
+ arg = args[i++];
+ //check options
+ if(arg.equals("-g")) {
+ if(i < args.length) {
+ geneLength = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-s")) {
+ if(i < args.length) {
+ segmentLength = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-n")) {
+ if(i < args.length) {
+ minNumSegment = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-t")) {
+ if(i < args.length) {
+ numThread = new Integer(args[i++]).intValue();
+ }
+ }
+ }
+ }
+}
+
+public enum param_types {
+ PARAM_GENE /*= (unsigned char)'g'*/,
+ PARAM_NUMBER /*= (unsigned char)'n'*/,
+ PARAM_SEGMENT /*= (unsigned char)'s'*/,
+ PARAM_THREAD /*= (unsigned char)'t',*/
+}
--- /dev/null
+/*
+"gene.h"
+"random.h"
+"segments.h"
+"sequencer.h"
+"thread.h"
+"timer.h"
+"tm.h"
+"vector.h"
+"bitmap.h"
+
+*/
+
+public class Genome {
+ long geneLength;
+ long segmentLength;
+ long minNumSegment;
+ long numThread;
+
+ Genome(String x[]) {
+ parseCmdLine(x);
+ }
+
+ public static void main(String x[]){
+
+/* TIMER_T start; */
+/* TIMER_T stop; */
+
+/* GOTO_REAL(); */
+
+ /* Initialization */
+/* parseArgs(argc, (char** const)argv); */
+/* SIM_GET_NUM_CPU(global_params[PARAM_THREAD]); */
+
+ System.out.print("Creating gene and segments... ");
+ Genome g = new Genome(x);
+
+
+/* TM_STARTUP(numThread); */
+/* P_MEMORY_STARTUP(numThread); */
+/* thread_startup(numThread); */
+
+ Random randomPtr = new Random();
+ random_alloc(randomPtr);
+ random_seed(randomPtr, 0);
+
+ Gene genePtr = new Gene(geneLength);
+ genePtr.create(randomPtr);
+ String gene = genePtr.contents;
+
+ Segments segmentsPtr = new Segments(segmentLength, minNumSegment);
+ assert(segmentsPtr != NULL);
+ segments_create(segmentsPtr, genePtr, randomPtr);
+ sequencer_t* sequencerPtr = sequencer_alloc(geneLength, segmentLength, segmentsPtr);
+ assert(sequencerPtr != NULL);
+
+ puts("done.");
+ printf("Gene length = %li\n", genePtr->length);
+ printf("Segment length = %li\n", segmentsPtr->length);
+ printf("Number segments = %li\n", vector_getSize(segmentsPtr->contentsPtr));
+ fflush(stdout);
+
+ /* Benchmark */
+ printf("Sequencing gene... ");
+ fflush(stdout);
+ TIMER_READ(start);
+ GOTO_SIM();
+#ifdef OTM
+#pragma omp parallel
+ {
+ sequencer_run(sequencerPtr);
+ }
+#else
+ thread_start(sequencer_run, (void*)sequencerPtr);
+#endif
+ GOTO_REAL();
+ TIMER_READ(stop);
+ puts("done.");
+ printf("Time = %lf\n", TIMER_DIFF_SECONDS(start, stop));
+ fflush(stdout);
+
+ /* Check result */
+ {
+ char* sequence = sequencerPtr->sequence;
+ int result = strcmp(gene, sequence);
+ printf("Sequence matches gene: %s\n", (result ? "no" : "yes"));
+ if (result) {
+ printf("gene = %s\n", gene);
+ printf("sequence = %s\n", sequence);
+ }
+ fflush(stdout);
+ assert(strlen(sequence) >= strlen(gene));
+ }
+
+ /* Clean up */
+ printf("Deallocating memory... ");
+ fflush(stdout);
+ sequencer_free(sequencerPtr);
+ segments_free(segmentsPtr);
+ gene_free(genePtr);
+ random_free(randomPtr);
+ puts("done.");
+ fflush(stdout);
+
+ TM_SHUTDOWN();
+ P_MEMORY_SHUTDOWN();
+
+ GOTO_SIM();
+
+ thread_shutdown();
+
+ MAIN_RETURN(0);
+ }
+
+ public static void parseCmdLine(String args[]) {
+
+ int i = 0;
+ String arg;
+ while (i < args.length && args[i].startsWith("-")) {
+ arg = args[i++];
+ //check options
+ if(arg.equals("-g")) {
+ if(i < args.length) {
+ geneLength = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-s")) {
+ if(i < args.length) {
+ segmentLength = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-n")) {
+ if(i < args.length) {
+ minNumSegment = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-t")) {
+ if(i < args.length) {
+ numThread = new Integer(args[i++]).intValue();
+ }
+ }
+ }
+ }
+}
+
+public enum param_types {
+ PARAM_GENE /*= (unsigned char)'g'*/,
+ PARAM_NUMBER /*= (unsigned char)'n'*/,
+ PARAM_SEGMENT /*= (unsigned char)'s'*/,
+ PARAM_THREAD /*= (unsigned char)'t',*/
+}
--- /dev/null
+public class List {
+
+private class list_node {
+ void* dataPtr;
+ struct list_node* nextPtr;
+}
+
+typedef list_node_t* list_iter_t;
+
+ list_node_t head;
+ long (*compare)(const void*, const void*); /* returns {-1,0,1}, 0 -> equal */
+ long size;
+} list_t;
+
+
+/* =============================================================================
+ * list_iter_reset
+ * =============================================================================
+ */
+void
+list_iter_reset (list_iter_t* itPtr, list_t* listPtr);
+
+
+/* =============================================================================
+ * TMlist_iter_reset
+ * =============================================================================
+ */
+TM_CALLABLE
+void
+TMlist_iter_reset (TM_ARGDECL list_iter_t* itPtr, list_t* listPtr);
+
+
+/* =============================================================================
+ * list_iter_hasNext
+ * =============================================================================
+ */
+bool_t
+list_iter_hasNext (list_iter_t* itPtr, list_t* listPtr);
+
+
+/* =============================================================================
+ * TMlist_iter_hasNext
+ * =============================================================================
+ */
+TM_CALLABLE
+bool_t
+TMlist_iter_hasNext (TM_ARGDECL list_iter_t* itPtr, list_t* listPtr);
+
+
+/* =============================================================================
+ * list_iter_next
+ * =============================================================================
+ */
+void*
+list_iter_next (list_iter_t* itPtr, list_t* listPtr);
+
+
+/* =============================================================================
+ * TMlist_iter_next
+ * =============================================================================
+ */
+TM_CALLABLE
+void*
+TMlist_iter_next (TM_ARGDECL list_iter_t* itPtr, list_t* listPtr);
+
+
+/* =============================================================================
+ * list_alloc
+ * -- If NULL passed for 'compare' function, will compare data pointer addresses
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+list_t*
+list_alloc (long (*compare)(const void*, const void*));
+
+
+/* =============================================================================
+ * Plist_alloc
+ * -- If NULL passed for 'compare' function, will compare data pointer addresses
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+list_t*
+Plist_alloc (long (*compare)(const void*, const void*));
+
+
+/* =============================================================================
+ * TMlist_alloc
+ * -- If NULL passed for 'compare' function, will compare data pointer addresses
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+list_t*
+TMlist_alloc (TM_ARGDECL long (*compare)(const void*, const void*));
+
+
+/* =============================================================================
+ * list_free
+ * =============================================================================
+ */
+void
+list_free (list_t* listPtr);
+
+
+/* =============================================================================
+ * Plist_free
+ * -- If NULL passed for 'compare' function, will compare data pointer addresses
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+void
+Plist_free (list_t* listPtr);
+
+
+/* =============================================================================
+ * TMlist_free
+ * -- If NULL passed for 'compare' function, will compare data pointer addresses
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+void
+TMlist_free (TM_ARGDECL list_t* listPtr);
+
+
+
+/* =============================================================================
+ * list_isEmpty
+ * -- Return TRUE if list is empty, else FALSE
+ * =============================================================================
+ */
+bool_t
+list_isEmpty (list_t* listPtr);
+
+
+/* =============================================================================
+ * TMlist_isEmpty
+ * -- Return TRUE if list is empty, else FALSE
+ * =============================================================================
+ */
+TM_CALLABLE
+bool_t
+TMlist_isEmpty (TM_ARGDECL list_t* listPtr);
+
+
+/* =============================================================================
+ * list_getSize
+ * -- Returns size of list
+ * =============================================================================
+ */
+long
+list_getSize (list_t* listPtr);
+
+
+/* =============================================================================
+ * TMlist_getSize
+ * -- Returns size of list
+ * =============================================================================
+ */
+TM_CALLABLE
+long
+TMlist_getSize (TM_ARGDECL list_t* listPtr);
+
+
+/* =============================================================================
+ * list_find
+ * -- Returns NULL if not found, else returns pointer to data
+ * =============================================================================
+ */
+void*
+list_find (list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * TMlist_find
+ * -- Returns NULL if not found, else returns pointer to data
+ * =============================================================================
+ */
+TM_CALLABLE
+void*
+TMlist_find (TM_ARGDECL list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * list_insert
+ * -- Return TRUE on success, else FALSE
+ * =============================================================================
+ */
+bool_t
+list_insert (list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * Plist_insert
+ * -- Return TRUE on success, else FALSE
+ * =============================================================================
+ */
+bool_t
+Plist_insert (list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * TMlist_insert
+ * -- Return TRUE on success, else FALSE
+ * =============================================================================
+ */
+TM_CALLABLE
+bool_t
+TMlist_insert (TM_ARGDECL list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * list_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+list_remove (list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * Plist_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+Plist_remove (list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * TMlist_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+TM_CALLABLE
+bool_t
+TMlist_remove (TM_ARGDECL list_t* listPtr, void* dataPtr);
+
+
+/* =============================================================================
+ * list_clear
+ * -- Removes all elements
+ * =============================================================================
+ */
+void
+list_clear (list_t* listPtr);
+
+
+/* =============================================================================
+ * Plist_clear
+ * -- Removes all elements
+ * =============================================================================
+ */
+void
+Plist_clear (list_t* listPtr);
+
+}
--- /dev/null
+public class List {
+
+
+}
--- /dev/null
+public class Random {
+ long[] mt;
+ int mti;
+ long RANDOM_DEFAULT_SEED;
+ /* period parameter */
+ int N;
+ int M;
+ long MATRIX_A;
+ long UPPER_MASK;
+ long LOWER_MASK;
+
+ public Random() {
+ RANDOM_DEFAULT_SEED = 0L;
+ N = 624;
+ M = 397;
+ mt = new long[N];
+ mti = 0;
+ MATRIX_A = 0x9908b0dfL; /* constant vector a */
+ UPPER_MASK = 0x80000000L; /* most significant w-r bits */
+ LOWER_MASK = 0x7fffffffL; /* least significant r bits */
+ }
+
+ public Random random_alloc(Random rand) {
+ init_genrand(rand, rand.RANDOM_DEFAULT_SEED);
+ return rand;
+ }
+
+ /* initializes mt[N] with a seed */
+ public void init_genrand(Random rand, long s) {
+ int mti;
+
+ rand.mt[0]= s & 0xFFFFFFFFL;
+ for (mti=1; mti<rand.N; mti++) {
+ rand.mt[mti] = (1812433253L * (rand.mt[mti-1] ^ (rand.mt[mti-1] >> 30)) + mti);
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ rand.mt[mti] &= 0xFFFFFFFFL;
+ /* for >32 bit machines */
+ }
+
+ rand.mti = mti;
+ }
+
+ public void random_seed(Random rand, long seed) {
+ init_genrand(rand, seed);
+ }
+
+ public long random_generate(Random rand) {
+ return genrand_int32(rand);
+ }
+
+ //public static long genrand_int32(long[] mt, long mtiPtr) {
+ public long genrand_int32(Random rand) {
+ long y;
+ long[] mag01= new long[2];
+ mag01[0] = 0x0L;
+ mag01[1] = rand.MATRIX_A;
+ int mti = rand.mti;
+
+ /* mag01[x] = x * MATRIX_A for x=0,1 */
+
+ if (mti >= rand.N) { /* generate N words at one time */
+ int kk;
+
+ if (mti == rand.N+1) /* if init_genrand() has not been called, */
+ init_genrand(rand, 5489L); /* a default initial seed is used */
+
+ for (kk=0;kk<rand.N-rand.M;kk++) {
+ y = (rand.mt[kk]&rand.UPPER_MASK)|(rand.mt[kk+1]&LOWER_MASK);
+ rand.mt[kk] = rand.mt[kk+M] ^ (y >> 1) ^ mag01[(int)(y & 0x1L)];
+ }
+ for (;kk<rand.N-1;kk++) {
+ y = (rand.mt[kk]&rand.UPPER_MASK)|(rand.mt[kk+1]&LOWER_MASK);
+ rand.mt[kk] = rand.mt[kk+(M-N)] ^ (y >> 1) ^ mag01[(int)(y & 0x1L)];
+ }
+ y = (rand.mt[N-1]&rand.UPPER_MASK)|(rand.mt[0]&LOWER_MASK);
+ rand.mt[N-1] = rand.mt[M-1] ^ (y >> 1) ^ mag01[(int)(y & 0x1L)];
+
+ mti = 0;
+ }
+
+ y = rand.mt[mti++];
+
+ /* Tempering */
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680L;
+ y ^= (y << 15) & 0xefc60000L;
+ y ^= (y >> 18);
+
+ rand.mti = mti;
+
+ return y;
+ }
+}
--- /dev/null
+public class Segments {
+ public long length;
+ public long minNum;
+ Vector contentsPtr;
+/* private: */
+ String strings[];
+
+ Segments (long myLength, long myMinNum) {
+ minNum = myMinNum;
+ length = myLength;
+
+ contentsPtr = new Vector(minNum);
+
+ }
+
+
+ /* =============================================================================
+ * segments_create
+ * -- Populates 'contentsPtr'
+ * =============================================================================
+ */
+ void create (Gene genePtr, Random randomPtr) {
+ String geneString;
+ long geneLength;
+ Bitmap startBitmapPtr;
+ long numStart;
+ long i;
+ long maxZeroRunLength;
+
+ geneString = genePtr.contents;
+ geneLength = genePtr.length;
+ startBitmapPtr = genePtr.startBitmapPtr;
+ numStart = geneLength - segmentLength + 1;
+
+ /* Pick some random segments to start */
+ for (i = 0; i < minNumSegment; i++) {
+ long j = (long)(random_generate(randomPtr) % numStart);
+ boolean status = startBitmapPtr.set(j);
+ strings[i] = geneString[j];
+ segmentsContentsPtr.add(strings[i]);
+ }
+
+ /* Make sure segment covers start */
+ i = 0;
+ if (!startBitmapPtr.isSet(i)) {
+ String string;
+ string = geneString[i];
+ segmentsContentsPtr.add(string);
+ startBitmapPtr.set(i);
+ }
+
+ /* Add extra segments to fill holes and ensure overlap */
+ maxZeroRunLength = length - 1;
+ for (i = 0; i < numStart; i++) {
+ long i_stop = MIN((i+maxZeroRunLength), numStart);
+ for ( /* continue */; i < i_stop; i++) {
+ if (startBitmapPtr.isSet(i)) {
+ break;
+ }
+ }
+ if (i == i_stop) {
+ /* Found big enough hole */
+ i = i - 1;
+ String string = geneString[i];
+ segmentsContentsPtr.add(string);
+ startBitmapPtr.set(i);
+ }
+ }
+ }
+}
--- /dev/null
+public class Segments {
+ public long length;
+ public long minNum;
+ Vector contentsPtr;
+/* private: */
+ String strings[];
+
+ Segments (long myLength, long myMinNum) {
+ minNum = myMinNum;
+ length = myLength;
+
+ contentsPtr = new Vector(minNum);
+
+ }
+
+
+ /* =============================================================================
+ * segments_create
+ * -- Populates 'contentsPtr'
+ * =============================================================================
+ */
+ void create (Gene genePtr, Random randomPtr) {
+ String geneString;
+ long geneLength;
+ Bitmap startBitmapPtr;
+ long numStart;
+ long i;
+ long maxZeroRunLength;
+
+ geneString = genePtr.contents;
+ geneLength = genePtr.length;
+ startBitmapPtr = genePtr.startBitmapPtr;
+ numStart = geneLength - segmentLength + 1;
+
+ /* Pick some random segments to start */
+ for (i = 0; i < minNumSegment; i++) {
+ long j = (long)(random_generate(randomPtr) % numStart);
+ boolean status = startBitmapPtr.set(j);
+ strings[i] = geneString[j];
+ segmentsContentsPtr.add(strings[i]);
+ }
+
+ /* Make sure segment covers start */
+ i = 0;
+ if (!startBitmapPtr.isSet(i)) {
+ String string;
+ string = geneString[i];
+ segmentsContentsPtr.add(string);
+ startBitmapPtr.set(i);
+ }
+
+ /* Add extra segments to fill holes and ensure overlap */
+ maxZeroRunLength = length - 1;
+ for (i = 0; i < numStart; i++) {
+ long i_stop = MIN((i+maxZeroRunLength), numStart);
+ for ( /* continue */; i < i_stop; i++) {
+ if (startBitmapPtr.isSet(i)) {
+ break;
+ }
+ }
+ if (i == i_stop) {
+ /* Found big enough hole */
+ i = i - 1;
+ String string = geneString[i];
+ segmentsContentsPtr.add(string);
+ startBitmapPtr.set(i);
+ assert(status);
+ }
+ }
+ }
+}
--- /dev/null
+public class Sequencer {
+
+ public char* sequence;
+
+ public Segments segmentsPtr;
+
+ /* For removing duplicate segments */
+ Hashmap uniqueSegmentsPtr;
+
+ /* For matching segments */
+ endInfoEntry endInfoEntries[];
+ Table startHashToConstructEntryTables[];
+
+ /* For constructing sequence */
+ constructEntry constructEntries[];
+ Table hashToConstructEntryTable;
+
+ /* For deallocation */
+ long segmentLength;
+
+
+ /* =============================================================================
+ * sequencer_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Sequencer (long myGeneLength, long mySegmentLength, Segments mySegmentsPtr) {
+
+ long maxNumUniqueSegment = myGeneLength - mySegmentLength + 1;
+ long i;
+
+ uniqueSegmentsPtr = new Hashmap(myGeneLength);
+
+ /* For finding a matching entry */
+ endInfoEntries = new endInfoEntry[maxNumUniqueSegment];
+ for (i = 0; i < maxNumUniqueSegment; i++) {
+ endInfoEntries[i].isEnd = TRUE;
+ endInfoEntries[i].jumpToNext = 1;
+ }
+ startHashToConstructEntryTables = new Table[mySegmentLength];
+ for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */
+ startHashToConstructEntryTables[i] = new Table(myGeneLength);
+ }
+ segmentLength = mySegmentLength;
+
+ /* For constructing sequence */
+ constructEntries = new ContructEntry[maxNumUniqueSegment];
+
+ for (i= 0; i < maxNumUniqueSegment; i++) {
+ constructEntries[i].isStart = TRUE;
+ constructEntries[i].segment = NULL;
+ constructEntries[i].endHash = 0;
+ constructEntries[i].startPtr = constructEntries[i];
+ constructEntries[i].nextPtr = NULL;
+ constructEntries[i].endPtr = constructEntries[i];
+ constructEntries[i].overlap = 0;
+ constructEntries[i].length = segmentLength;
+ }
+ hashToConstructEntryTable = new Table(geneLength);
+
+ segmentsPtr = mySegmentsPtr;
+ }
+
+
+ /* =============================================================================
+ * sequencer_run
+ * =============================================================================
+ */
+
+ void run () {
+
+ //TM_THREAD_ENTER();
+
+ long threadId = thread_getId();
+
+ //Sequencer sequencerPtr = (sequencer_t*)argPtr;
+
+ Hashmap uniqueSegmentsPtr;
+ endInfoEntry endInfoEntries[];
+ Hashmap startHashToConstructEntryTables[];
+ constructEntry constructEntries[];
+ Hashmap hashToConstructEntryTable;
+
+ Vector segmentsContentsPtr = segmentsPtr.contentsPtr;
+ long numSegment = segmentsContentsPtr.size();
+ long segmentLength = segmentsPtr.length;
+
+ long i;
+ long j;
+ long i_start;
+ long i_stop;
+ long numUniqueSegment;
+ long substringLength;
+ long entryIndex;
+
+ /*
+ * Step 1: Remove duplicate segments
+ */
+#if defined(HTM) || defined(STM)
+ long numThread = thread_getNumThread();
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = numSegment;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+#else /* !(HTM || STM) */
+ i_start = 0;
+ i_stop = numSegment;
+#endif /* !(HTM || STM) */
+ for (i = i_start; i < i_stop; i+=CHUNK_STEP1) {
+ TM_BEGIN();
+ {
+ long ii;
+ long ii_stop = MIN(i_stop, (i+CHUNK_STEP1));
+ for (ii = i; ii < ii_stop; ii++) {
+ string segment = segmentsContentsPtr.get(ii);
+ TMHASHTABLE_INSERT(uniqueSegmentsPtr,
+ segment,
+ segment);
+ } /* ii */
+ }
+ TM_END();
+ }
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2a: Iterate over unique segments and compute hashes.
+ *
+ * For the gene "atcg", the hashes for the end would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * And for the gene "tcgg", the hashes for the start would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * The names are "end" and "start" because if a matching pair is found,
+ * they are the substring of the end part of the pair and the start
+ * part of the pair respectively. In the above example, "tcg" is the
+ * matching substring so:
+ *
+ * (end) (start)
+ * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg")
+ */
+
+ /* uniqueSegmentsPtr is constant now */
+ numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr);
+ entryIndex = 0;
+
+#if defined(HTM) || defined(STM)
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ long num = uniqueSegmentsPtr->numBucket;
+ long partitionSize = (num + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = num;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+ {
+ /* Approximate disjoint segments of element allocation in constructEntries */
+ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ entryIndex = threadId * partitionSize;
+ }
+#else /* !(HTM || STM) */
+ i_start = 0;
+ i_stop = uniqueSegmentsPtr->numBucket;
+ entryIndex = 0;
+#endif /* !(HTM || STM) */
+
+ for (i = i_start; i < i_stop; i++) {
+
+ list_t* chainPtr = uniqueSegmentsPtr->buckets[i];
+ list_iter_t it;
+ list_iter_reset(&it, chainPtr);
+
+ while (list_iter_hasNext(&it, chainPtr)) {
+
+ char* segment =
+ (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr;
+ constructEntry_t* constructEntryPtr;
+ long j;
+ ulong_t startHash;
+ bool_t status;
+
+ /* Find an empty constructEntries entry */
+ TM_BEGIN();
+ while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) {
+ entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */
+ }
+ constructEntryPtr = &constructEntries[entryIndex];
+ TM_SHARED_WRITE_P(constructEntryPtr->segment, segment);
+ TM_END();
+ entryIndex = (entryIndex + 1) % numUniqueSegment;
+
+ /*
+ * Save hashes (sdbm algorithm) of segment substrings
+ *
+ * endHashes will be computed for shorter substrings after matches
+ * have been made (in the next phase of the code). This will reduce
+ * the number of substrings for which hashes need to be computed.
+ *
+ * Since we can compute startHashes incrementally, we go ahead
+ * and compute all of them here.
+ */
+ /* constructEntryPtr is local now */
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]);
+
+ startHash = 0;
+ for (j = 1; j < segmentLength; j++) {
+ startHash = (ulong_t)segment[j-1] +
+ (startHash << 6) + (startHash << 16) - startHash;
+ TM_BEGIN();
+ status = TMTABLE_INSERT(startHashToConstructEntryTables[j],
+ (ulong_t)startHash,
+ (void*)constructEntryPtr );
+ TM_END();
+ assert(status);
+ }
+
+ /*
+ * For looking up construct entries quickly
+ */
+ startHash = (ulong_t)segment[j-1] +
+ (startHash << 6) + (startHash << 16) - startHash;
+ TM_BEGIN();
+ status = TMTABLE_INSERT(hashToConstructEntryTable,
+ (ulong_t)startHash,
+ (void*)constructEntryPtr);
+ TM_END();
+ assert(status);
+ }
+ }
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2b: Match ends to starts by using hash-based string comparison.
+ */
+ for (substringLength = segmentLength-1; substringLength > 0; substringLength--) {
+
+ table_t* startHashToConstructEntryTablePtr =
+ startHashToConstructEntryTables[substringLength];
+ list_t** buckets = startHashToConstructEntryTablePtr->buckets;
+ long numBucket = startHashToConstructEntryTablePtr->numBucket;
+
+ long index_start;
+ long index_stop;
+
+#if defined(HTM) || defined(STM)
+ {
+ /* Choose disjoint segments [index_start,index_stop) for each thread */
+ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ index_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ index_stop = numUniqueSegment;
+ } else {
+ index_stop = index_start + partitionSize;
+ }
+ }
+#else /* !(HTM || STM) */
+ index_start = 0;
+ index_stop = numUniqueSegment;
+#endif /* !(HTM || STM) */
+
+ /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */
+ for (entryIndex = index_start;
+ entryIndex < index_stop;
+ entryIndex += endInfoEntries[entryIndex].jumpToNext)
+ {
+ if (!endInfoEntries[entryIndex].isEnd) {
+ continue;
+ }
+
+ /* ConstructEntries[entryIndex] is local data */
+ constructEntry_t* endConstructEntryPtr =
+ &constructEntries[entryIndex];
+ char* endSegment = endConstructEntryPtr->segment;
+ ulong_t endHash = endConstructEntryPtr->endHash;
+
+ list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */
+ list_iter_t it;
+ list_iter_reset(&it, chainPtr);
+
+ /* Linked list at chainPtr is constant */
+ while (list_iter_hasNext(&it, chainPtr)) {
+
+ constructEntry_t* startConstructEntryPtr =
+ (constructEntry_t*)list_iter_next(&it, chainPtr);
+ char* startSegment = startConstructEntryPtr->segment;
+ long newLength = 0;
+
+ /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */
+ TM_BEGIN();
+
+ /* Check if matches */
+ if (TM_SHARED_READ(startConstructEntryPtr->isStart) &&
+ (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) &&
+ (strncmp(startSegment,
+ &endSegment[segmentLength - substringLength],
+ substringLength) == 0))
+ {
+ TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE);
+
+ constructEntry_t* startConstructEntry_endPtr;
+ constructEntry_t* endConstructEntry_startPtr;
+
+ /* Update endInfo (appended something so no longer end) */
+ TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE);
+
+ /* Update segment chain construct info */
+ startConstructEntry_endPtr =
+ (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr);
+ endConstructEntry_startPtr =
+ (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr);
+
+ assert(startConstructEntry_endPtr);
+ assert(endConstructEntry_startPtr);
+ TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr,
+ endConstructEntry_startPtr);
+ TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr,
+ startConstructEntryPtr);
+ TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr,
+ startConstructEntry_endPtr);
+ TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength);
+ newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) +
+ (long)TM_SHARED_READ(startConstructEntryPtr->length) -
+ substringLength;
+ TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength);
+ } /* if (matched) */
+
+ TM_END();
+
+ if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */
+ break;
+ }
+ } /* iterate over chain */
+
+ } /* for (endIndex < numUniqueSegment) */
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2c: Update jump values and hashes
+ *
+ * endHash entries of all remaining ends are updated to the next
+ * substringLength. Additionally jumpToNext entries are updated such
+ * that they allow to skip non-end entries. Currently this is sequential
+ * because parallelization did not perform better.
+. */
+
+ if (threadId == 0) {
+ if (substringLength > 1) {
+ long index = segmentLength - substringLength + 1;
+ /* initialization if j and i: with i being the next end after j=0 */
+ for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) {
+ /* find first non-null */
+ }
+ /* entry 0 is handled seperately from the loop below */
+ endInfoEntries[0].jumpToNext = i;
+ if (endInfoEntries[0].isEnd) {
+ constructEntry_t* constructEntryPtr = &constructEntries[0];
+ char* segment = constructEntryPtr->segment;
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]);
+ }
+ /* Continue scanning (do not reset i) */
+ for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) {
+ if (endInfoEntries[i].isEnd) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ char* segment = constructEntryPtr->segment;
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]);
+ endInfoEntries[j].jumpToNext = MAX(1, (i - j));
+ j = i;
+ }
+ }
+ endInfoEntries[j].jumpToNext = i - j;
+ }
+ }
+
+ thread_barrier_wait();
+
+ } /* for (substringLength > 0) */
+
+
+ thread_barrier_wait();
+
+ /*
+ * Step 3: Build sequence string
+ */
+ if (threadId == 0) {
+
+ long totalLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ if (constructEntryPtr->isStart) {
+ totalLength += constructEntryPtr->length;
+ }
+ }
+
+ sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char));
+ char* sequence = sequencerPtr->sequence;
+ assert(sequence);
+
+ char* copyPtr = sequence;
+ long sequenceLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ /* If there are several start segments, we append in arbitrary order */
+ if (constructEntryPtr->isStart) {
+ long newSequenceLength = sequenceLength + constructEntryPtr->length;
+ assert( newSequenceLength <= totalLength );
+ copyPtr = sequence + sequenceLength;
+ sequenceLength = newSequenceLength;
+ do {
+ long numChar = segmentLength - constructEntryPtr->overlap;
+ if ((copyPtr + numChar) > (sequence + newSequenceLength)) {
+ TM_PRINT0("ERROR: sequence length != actual length\n");
+ break;
+ }
+ memcpy(copyPtr,
+ constructEntryPtr->segment,
+ (numChar * sizeof(char)));
+ copyPtr += numChar;
+ } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL);
+ assert(copyPtr <= (sequence + sequenceLength));
+ }
+ }
+
+ assert(sequence != NULL);
+ sequence[sequenceLength] = '\0';
+ }
+
+ TM_THREAD_EXIT();
+
+ }
+
+
+ private class endInfoEntry {
+ boolean isEnd;
+ long jumpToNext;
+ }
+
+ private class constructEntry {
+ boolean isStart;
+ String segment;
+ long endHash;
+ constructEntry startPtr;
+ constructEntry nextPtr;
+ constructEntry endPtr;
+ long overlap;
+ long length;
+ }
+
+ private class sequencer_run_arg {
+ Sequencer sequencerPtr;
+ Segments segmentsPtr;
+ long preAllocLength;
+ String returnSequence; /* variable stores return value */
+ }
+ /* =============================================================================
+ * hashString
+ * -- uses sdbm hash function
+ * =============================================================================
+ */
+ static long hashString (String str)
+ {
+ long hash = 0;
+ long c;
+
+ /* Note: Do not change this hashing scheme */
+ while ((c = str++) != '\0') {
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return (long)hash;
+ }
+
+
+ /* =============================================================================
+ * hashSegment
+ * -- For hashtable
+ * =============================================================================
+ */
+ static long hashSegment (string keyPtr)
+ {
+ return (long)hash_sdbm(keyPtr); /* can be any "good" hash function */
+ }
+
+
+ /* =============================================================================
+ * compareSegment
+ * -- For hashtable
+ * =============================================================================
+ */
+ static long compareSegment (pair_t* a, pair_t* b)
+ {
+ return strcmp((char*)(a->firstPtr), (char*)(b->firstPtr));
+ }
+
+}
--- /dev/null
+public class Sequencer {
+
+ public char* sequence;
+
+ public Segments segmentsPtr;
+
+ /* For removing duplicate segments */
+ Hashmap uniqueSegmentsPtr;
+
+ /* For matching segments */
+ endInfoEntry endInfoEntries[];
+ Table startHashToConstructEntryTables[];
+
+ /* For constructing sequence */
+ constructEntry constructEntries[];
+ Table hashToConstructEntryTable;
+
+ /* For deallocation */
+ long segmentLength;
+
+
+ /* =============================================================================
+ * sequencer_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Sequencer (long myGeneLength, long mySegmentLength, Segments mySegmentsPtr) {
+
+ long maxNumUniqueSegment = myGeneLength - mySegmentLength + 1;
+ long i;
+
+ uniqueSegmentsPtr = new Hashmap(myGeneLength);
+
+ /* For finding a matching entry */
+ endInfoEntries = new endInfoEntry[maxNumUniqueSegment];
+ for (i = 0; i < maxNumUniqueSegment; i++) {
+ endInfoEntries[i].isEnd = TRUE;
+ endInfoEntries[i].jumpToNext = 1;
+ }
+ startHashToConstructEntryTables = new Table[mySegmentLength];
+ for (i = 1; i < mySegmentLength; i++) { /* 0 is dummy entry */
+ startHashToConstructEntryTables[i] = new Table(myGeneLength);
+ }
+ segmentLength = mySegmentLength;
+
+ /* For constructing sequence */
+ constructEntries = new ContructEntry[maxNumUniqueSegment];
+
+ for (i= 0; i < maxNumUniqueSegment; i++) {
+ constructEntries[i].isStart = TRUE;
+ constructEntries[i].segment = NULL;
+ constructEntries[i].endHash = 0;
+ constructEntries[i].startPtr = constructEntries[i];
+ constructEntries[i].nextPtr = NULL;
+ constructEntries[i].endPtr = constructEntries[i];
+ constructEntries[i].overlap = 0;
+ constructEntries[i].length = segmentLength;
+ }
+ hashToConstructEntryTable = new Table(geneLength);
+
+ segmentsPtr = mySegmentsPtr;
+ }
+
+
+ /* =============================================================================
+ * sequencer_run
+ * =============================================================================
+ */
+
+ void run () {
+
+ //TM_THREAD_ENTER();
+
+ long threadId = thread_getId();
+
+ //Sequencer sequencerPtr = (sequencer_t*)argPtr;
+
+ Hashmap uniqueSegmentsPtr;
+ endInfoEntry endInfoEntries[];
+ Hashmap startHashToConstructEntryTables[];
+ constructEntry constructEntries[];
+ Hashmap hashToConstructEntryTable;
+
+ Vector segmentsContentsPtr = segmentsPtr.contentsPtr;
+ long numSegment = segmentsContentsPtr.size();
+ long segmentLength = segmentsPtr.length;
+
+ long i;
+ long j;
+ long i_start;
+ long i_stop;
+ long numUniqueSegment;
+ long substringLength;
+ long entryIndex;
+
+ /*
+ * Step 1: Remove duplicate segments
+ */
+#if defined(HTM) || defined(STM)
+ long numThread = thread_getNumThread();
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = numSegment;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+#else /* !(HTM || STM) */
+ i_start = 0;
+ i_stop = numSegment;
+#endif /* !(HTM || STM) */
+ for (i = i_start; i < i_stop; i+=CHUNK_STEP1) {
+ TM_BEGIN();
+ {
+ long ii;
+ long ii_stop = MIN(i_stop, (i+CHUNK_STEP1));
+ for (ii = i; ii < ii_stop; ii++) {
+ void* segment = vector_at(segmentsContentsPtr, ii);
+ TMHASHTABLE_INSERT(uniqueSegmentsPtr,
+ segment,
+ segment);
+ } /* ii */
+ }
+ TM_END();
+ }
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2a: Iterate over unique segments and compute hashes.
+ *
+ * For the gene "atcg", the hashes for the end would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * And for the gene "tcgg", the hashes for the start would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * The names are "end" and "start" because if a matching pair is found,
+ * they are the substring of the end part of the pair and the start
+ * part of the pair respectively. In the above example, "tcg" is the
+ * matching substring so:
+ *
+ * (end) (start)
+ * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg")
+ */
+
+ /* uniqueSegmentsPtr is constant now */
+ numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr);
+ entryIndex = 0;
+
+#if defined(HTM) || defined(STM)
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ long num = uniqueSegmentsPtr->numBucket;
+ long partitionSize = (num + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = num;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+ {
+ /* Approximate disjoint segments of element allocation in constructEntries */
+ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ entryIndex = threadId * partitionSize;
+ }
+#else /* !(HTM || STM) */
+ i_start = 0;
+ i_stop = uniqueSegmentsPtr->numBucket;
+ entryIndex = 0;
+#endif /* !(HTM || STM) */
+
+ for (i = i_start; i < i_stop; i++) {
+
+ list_t* chainPtr = uniqueSegmentsPtr->buckets[i];
+ list_iter_t it;
+ list_iter_reset(&it, chainPtr);
+
+ while (list_iter_hasNext(&it, chainPtr)) {
+
+ char* segment =
+ (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr;
+ constructEntry_t* constructEntryPtr;
+ long j;
+ ulong_t startHash;
+ bool_t status;
+
+ /* Find an empty constructEntries entry */
+ TM_BEGIN();
+ while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) {
+ entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */
+ }
+ constructEntryPtr = &constructEntries[entryIndex];
+ TM_SHARED_WRITE_P(constructEntryPtr->segment, segment);
+ TM_END();
+ entryIndex = (entryIndex + 1) % numUniqueSegment;
+
+ /*
+ * Save hashes (sdbm algorithm) of segment substrings
+ *
+ * endHashes will be computed for shorter substrings after matches
+ * have been made (in the next phase of the code). This will reduce
+ * the number of substrings for which hashes need to be computed.
+ *
+ * Since we can compute startHashes incrementally, we go ahead
+ * and compute all of them here.
+ */
+ /* constructEntryPtr is local now */
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]);
+
+ startHash = 0;
+ for (j = 1; j < segmentLength; j++) {
+ startHash = (ulong_t)segment[j-1] +
+ (startHash << 6) + (startHash << 16) - startHash;
+ TM_BEGIN();
+ status = TMTABLE_INSERT(startHashToConstructEntryTables[j],
+ (ulong_t)startHash,
+ (void*)constructEntryPtr );
+ TM_END();
+ assert(status);
+ }
+
+ /*
+ * For looking up construct entries quickly
+ */
+ startHash = (ulong_t)segment[j-1] +
+ (startHash << 6) + (startHash << 16) - startHash;
+ TM_BEGIN();
+ status = TMTABLE_INSERT(hashToConstructEntryTable,
+ (ulong_t)startHash,
+ (void*)constructEntryPtr);
+ TM_END();
+ assert(status);
+ }
+ }
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2b: Match ends to starts by using hash-based string comparison.
+ */
+ for (substringLength = segmentLength-1; substringLength > 0; substringLength--) {
+
+ table_t* startHashToConstructEntryTablePtr =
+ startHashToConstructEntryTables[substringLength];
+ list_t** buckets = startHashToConstructEntryTablePtr->buckets;
+ long numBucket = startHashToConstructEntryTablePtr->numBucket;
+
+ long index_start;
+ long index_stop;
+
+#if defined(HTM) || defined(STM)
+ {
+ /* Choose disjoint segments [index_start,index_stop) for each thread */
+ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ index_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ index_stop = numUniqueSegment;
+ } else {
+ index_stop = index_start + partitionSize;
+ }
+ }
+#else /* !(HTM || STM) */
+ index_start = 0;
+ index_stop = numUniqueSegment;
+#endif /* !(HTM || STM) */
+
+ /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */
+ for (entryIndex = index_start;
+ entryIndex < index_stop;
+ entryIndex += endInfoEntries[entryIndex].jumpToNext)
+ {
+ if (!endInfoEntries[entryIndex].isEnd) {
+ continue;
+ }
+
+ /* ConstructEntries[entryIndex] is local data */
+ constructEntry_t* endConstructEntryPtr =
+ &constructEntries[entryIndex];
+ char* endSegment = endConstructEntryPtr->segment;
+ ulong_t endHash = endConstructEntryPtr->endHash;
+
+ list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */
+ list_iter_t it;
+ list_iter_reset(&it, chainPtr);
+
+ /* Linked list at chainPtr is constant */
+ while (list_iter_hasNext(&it, chainPtr)) {
+
+ constructEntry_t* startConstructEntryPtr =
+ (constructEntry_t*)list_iter_next(&it, chainPtr);
+ char* startSegment = startConstructEntryPtr->segment;
+ long newLength = 0;
+
+ /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */
+ TM_BEGIN();
+
+ /* Check if matches */
+ if (TM_SHARED_READ(startConstructEntryPtr->isStart) &&
+ (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) &&
+ (strncmp(startSegment,
+ &endSegment[segmentLength - substringLength],
+ substringLength) == 0))
+ {
+ TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE);
+
+ constructEntry_t* startConstructEntry_endPtr;
+ constructEntry_t* endConstructEntry_startPtr;
+
+ /* Update endInfo (appended something so no longer end) */
+ TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE);
+
+ /* Update segment chain construct info */
+ startConstructEntry_endPtr =
+ (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr);
+ endConstructEntry_startPtr =
+ (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr);
+
+ assert(startConstructEntry_endPtr);
+ assert(endConstructEntry_startPtr);
+ TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr,
+ endConstructEntry_startPtr);
+ TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr,
+ startConstructEntryPtr);
+ TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr,
+ startConstructEntry_endPtr);
+ TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength);
+ newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) +
+ (long)TM_SHARED_READ(startConstructEntryPtr->length) -
+ substringLength;
+ TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength);
+ } /* if (matched) */
+
+ TM_END();
+
+ if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */
+ break;
+ }
+ } /* iterate over chain */
+
+ } /* for (endIndex < numUniqueSegment) */
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2c: Update jump values and hashes
+ *
+ * endHash entries of all remaining ends are updated to the next
+ * substringLength. Additionally jumpToNext entries are updated such
+ * that they allow to skip non-end entries. Currently this is sequential
+ * because parallelization did not perform better.
+. */
+
+ if (threadId == 0) {
+ if (substringLength > 1) {
+ long index = segmentLength - substringLength + 1;
+ /* initialization if j and i: with i being the next end after j=0 */
+ for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) {
+ /* find first non-null */
+ }
+ /* entry 0 is handled seperately from the loop below */
+ endInfoEntries[0].jumpToNext = i;
+ if (endInfoEntries[0].isEnd) {
+ constructEntry_t* constructEntryPtr = &constructEntries[0];
+ char* segment = constructEntryPtr->segment;
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]);
+ }
+ /* Continue scanning (do not reset i) */
+ for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) {
+ if (endInfoEntries[i].isEnd) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ char* segment = constructEntryPtr->segment;
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]);
+ endInfoEntries[j].jumpToNext = MAX(1, (i - j));
+ j = i;
+ }
+ }
+ endInfoEntries[j].jumpToNext = i - j;
+ }
+ }
+
+ thread_barrier_wait();
+
+ } /* for (substringLength > 0) */
+
+
+ thread_barrier_wait();
+
+ /*
+ * Step 3: Build sequence string
+ */
+ if (threadId == 0) {
+
+ long totalLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ if (constructEntryPtr->isStart) {
+ totalLength += constructEntryPtr->length;
+ }
+ }
+
+ sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char));
+ char* sequence = sequencerPtr->sequence;
+ assert(sequence);
+
+ char* copyPtr = sequence;
+ long sequenceLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ /* If there are several start segments, we append in arbitrary order */
+ if (constructEntryPtr->isStart) {
+ long newSequenceLength = sequenceLength + constructEntryPtr->length;
+ assert( newSequenceLength <= totalLength );
+ copyPtr = sequence + sequenceLength;
+ sequenceLength = newSequenceLength;
+ do {
+ long numChar = segmentLength - constructEntryPtr->overlap;
+ if ((copyPtr + numChar) > (sequence + newSequenceLength)) {
+ TM_PRINT0("ERROR: sequence length != actual length\n");
+ break;
+ }
+ memcpy(copyPtr,
+ constructEntryPtr->segment,
+ (numChar * sizeof(char)));
+ copyPtr += numChar;
+ } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL);
+ assert(copyPtr <= (sequence + sequenceLength));
+ }
+ }
+
+ assert(sequence != NULL);
+ sequence[sequenceLength] = '\0';
+ }
+
+ TM_THREAD_EXIT();
+
+ }
+
+
+ private class endInfoEntry {
+ boolean isEnd;
+ long jumpToNext;
+ }
+
+ private class constructEntry {
+ boolean isStart;
+ String segment;
+ long endHash;
+ constructEntry startPtr;
+ constructEntry nextPtr;
+ constructEntry endPtr;
+ long overlap;
+ long length;
+ }
+
+ private class sequencer_run_arg {
+ Sequencer sequencerPtr;
+ Segments segmentsPtr;
+ long preAllocLength;
+ String returnSequence; /* variable stores return value */
+ }
+ /* =============================================================================
+ * hashString
+ * -- uses sdbm hash function
+ * =============================================================================
+ */
+ static long hashString (String str)
+ {
+ long hash = 0;
+ long c;
+
+ /* Note: Do not change this hashing scheme */
+ while ((c = str++) != '\0') {
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return (long)hash;
+ }
+
+
+ /* =============================================================================
+ * hashSegment
+ * -- For hashtable
+ * =============================================================================
+ */
+ static long hashSegment (string keyPtr)
+ {
+ return (long)hash_sdbm(keyPtr); /* can be any "good" hash function */
+ }
+
+
+ /* =============================================================================
+ * compareSegment
+ * -- For hashtable
+ * =============================================================================
+ */
+ static long compareSegment (pair_t* a, pair_t* b)
+ {
+ return strcmp((char*)(a->firstPtr), (char*)(b->firstPtr));
+ }
+
+}
--- /dev/null
+public class Table {
+
+ LinkedList buckets[];
+ long numBucket;
+
+
+ /* =============================================================================
+ * table_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Table (long myNumBucket) {
+
+ long i;
+
+ buckets = new LinkedList[myNumBucket];
+
+ numBucket = myNumBucket;
+
+ }
+
+
+ /* =============================================================================
+ * table_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+ boolean table_insert (long hash, void* dataPtr) {
+ long i = hash % numBucket;
+
+ if(buckets[i].indexOf(dataPtr) != -1) {
+ return FALSE;
+ }
+
+ buckets[i].add(dataPtr);
+
+ return TRUE;
+ }
+
+ /* =============================================================================
+ * table_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+ boolean table_remove (long hash, void* dataPtr) {
+
+ long i = hash % numBucket;
+
+ if (!buckets[i].remove(dataPtr) {
+ return FALSE;
+ }
+
+ return TRUE;
+
+ }
+
+}
--- /dev/null
+public class Table {
+
+ LinkedList buckets[];
+ long numBucket;
+
+
+ /* =============================================================================
+ * table_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+ Table (long myNumBucket) {
+
+ long i;
+
+ buckets = new LinkedList[myNumBucket];
+
+ numBucket = myNumBucket;
+
+ }
+
+
+ /* =============================================================================
+ * table_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+ boolean table_insert (long hash, void* dataPtr) {
+ long i = hash % numBucket;
+
+ if(buckets[i].indexOf(dataPtr) != -1) {
+ return FALSE;
+ }
+
+ buckets[i].add(dataPtr);
+
+ return TRUE;
+ }
+
+ /* =============================================================================
+ * table_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+ boolean table_remove (long hash, void* dataPtr) {
+
+ long i = hash % numBucket;
+
+ if (!list_remove(tablePtr->buckets[i], dataPtr)) {
+ return FALSE;
+ }
+
+ return TRUE;
+
+ }
+
+}
--- /dev/null
+/* =============================================================================
+ *
+ * nucleotide.h
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#ifndef NUCLEOTIDE_H
+#define NUCLEOTIDE_H 1
+
+
+typedef enum nucleotide_type {
+ NUCLEOTIDE_ADENINE = 'a',
+ NUCLEOTIDE_CYTOSINE = 'c',
+ NUCLEOTIDE_GUANINE = 'g',
+ NUCLEOTIDE_THYMINE = 't',
+ NUCLEOTIDE_NUM_TYPE = 4
+} nucleotide_type_t;
+
+
+#endif /* NUCLEOTIDE_H */
+
+
+/* =============================================================================
+ *
+ * End of nucleotide.h
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * segments.c
+ * -- Create random segments from random gene
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gene.h"
+#include "random.h"
+#include "segments.h"
+#include "utility.h"
+#include "vector.h"
+
+
+/* =============================================================================
+ * segments_alloc
+ * -- Does almost all the memory allocation for random segments
+ * -- The actual number of segments created by 'segments_create' may be larger
+ * than 'minNum' to ensure the segments overlap and cover the entire gene
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+segments_t*
+segments_alloc (long length, long minNum)
+{
+ segments_t* segmentsPtr;
+ long i;
+ char* string;
+
+ segmentsPtr = (segments_t*)malloc(sizeof(segments_t));
+ if (segmentsPtr == NULL) {
+ return NULL;
+ }
+
+ /* Preallocate for the min number of segments we will need */
+ segmentsPtr->strings = (char**)malloc(minNum * sizeof(char*));
+ if (segmentsPtr->strings == NULL) {
+ return NULL;
+ }
+
+ string = (char*)malloc(minNum * (length+1) * sizeof(char));
+ if (string == NULL) {
+ return NULL;
+ }
+ for (i = 0; i < minNum; i++) {
+ segmentsPtr->strings[i] = &string[i * (length+1)];
+ segmentsPtr->strings[i][length] = '\0';
+ }
+ segmentsPtr->minNum = minNum;
+ segmentsPtr->length = length;
+
+ segmentsPtr->contentsPtr = vector_alloc(minNum);
+ if (segmentsPtr->contentsPtr == NULL) {
+ return NULL;
+ }
+
+ return segmentsPtr;
+}
+
+
+/* =============================================================================
+ * segments_create
+ * -- Populates 'contentsPtr'
+ * =============================================================================
+ */
+void
+segments_create (segments_t* segmentsPtr, gene_t* genePtr, random_t* randomPtr)
+{
+ vector_t* segmentsContentsPtr;
+ char** strings;
+ long segmentLength;
+ long minNumSegment;
+ char* geneString;
+ long geneLength;
+ bitmap_t* startBitmapPtr;
+ long numStart;
+ long i;
+ long maxZeroRunLength;
+
+ assert(segmentsPtr != NULL);
+ assert(genePtr != NULL);
+ assert(randomPtr != NULL);
+
+ segmentsContentsPtr = segmentsPtr->contentsPtr;
+ strings = segmentsPtr->strings;
+ segmentLength = segmentsPtr->length;
+ minNumSegment = segmentsPtr->minNum;
+
+ geneString = genePtr->contents;
+ geneLength = genePtr->length;
+ startBitmapPtr = genePtr->startBitmapPtr;
+ numStart = geneLength - segmentLength + 1;
+
+ /* Pick some random segments to start */
+ for (i = 0; i < minNumSegment; i++) {
+ long j = (long)(random_generate(randomPtr) % numStart);
+ bool_t status = bitmap_set(startBitmapPtr, j);
+ assert(status);
+ memcpy(strings[i], &(geneString[j]), segmentLength * sizeof(char));
+ status = vector_pushBack(segmentsContentsPtr, (void*)strings[i]);
+ assert(status);
+ }
+
+ /* Make sure segment covers start */
+ i = 0;
+ if (!bitmap_isSet(startBitmapPtr, i)) {
+ char* string = (char*)malloc((segmentLength+1) * sizeof(char));
+ string[segmentLength] = '\0';
+ memcpy(string, &(geneString[i]), segmentLength * sizeof(char));
+ bool_t status = vector_pushBack(segmentsContentsPtr, (void*)string);
+ assert(status);
+ status = bitmap_set(startBitmapPtr, i);
+ assert(status);
+ }
+
+ /* Add extra segments to fill holes and ensure overlap */
+ maxZeroRunLength = segmentLength - 1;
+ for (i = 0; i < numStart; i++) {
+ long i_stop = MIN((i+maxZeroRunLength), numStart);
+ for ( /* continue */; i < i_stop; i++) {
+ if (bitmap_isSet(startBitmapPtr, i)) {
+ break;
+ }
+ }
+ if (i == i_stop) {
+ /* Found big enough hole */
+ char* string = (char*)malloc((segmentLength+1) * sizeof(char));
+ string[segmentLength] = '\0';
+ i = i - 1;
+ memcpy(string, &(geneString[i]), segmentLength * sizeof(char));
+ bool_t status = vector_pushBack(segmentsContentsPtr, (void*)string);
+ assert(status);
+ status = bitmap_set(startBitmapPtr, i);
+ assert(status);
+ }
+ }
+}
+
+
+/* =============================================================================
+ * segments_free
+ * =============================================================================
+ */
+void
+segments_free (segments_t* segmentsPtr)
+{
+ free(vector_at(segmentsPtr->contentsPtr, 0));
+ vector_free(segmentsPtr->contentsPtr);
+ free(segmentsPtr->strings);
+ free(segmentsPtr);
+}
+
+
+/* =============================================================================
+ * TEST_SEGMENTS
+ * =============================================================================
+ */
+#ifdef TEST_SEGMENTS
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include "types.h"
+
+
+static void
+tester (long geneLength, long segmentLength, long minNumSegment, bool_t doPrint)
+{
+ gene_t* genePtr;
+ segments_t* segmentsPtr;
+ random_t* randomPtr;
+ bitmap_t* startBitmapPtr;
+ long i;
+ long j;
+
+ genePtr = gene_alloc(geneLength);
+ segmentsPtr = segments_alloc(segmentLength, minNumSegment);
+ randomPtr = random_alloc();
+ startBitmapPtr = bitmap_alloc(geneLength);
+
+ random_seed(randomPtr, 0);
+ gene_create(genePtr, randomPtr);
+ random_seed(randomPtr, 0);
+ segments_create(segmentsPtr, genePtr, randomPtr);
+
+ assert(segmentsPtr->minNum == minNumSegment);
+ assert(vector_getSize(segmentsPtr->contentsPtr) >= minNumSegment);
+
+ if (doPrint) {
+ printf("Gene = %s\n", genePtr->contents);
+ }
+
+ /* Check that each segment occurs in gene */
+ for (i = 0; i < vector_getSize(segmentsPtr->contentsPtr); i++) {
+ char *charPtr = strstr(genePtr->contents,
+ (char*)vector_at(segmentsPtr->contentsPtr, i));
+ assert(charPtr != NULL);
+ j = charPtr - genePtr->contents;
+ bitmap_set(startBitmapPtr, j);
+ if (doPrint) {
+ printf("Segment %li (@%li) = %s\n",
+ i, j, (char*)vector_at(segmentsPtr->contentsPtr, i));
+ }
+ }
+
+ /* Check that there is complete overlap */
+ assert(bitmap_isSet(startBitmapPtr, 0));
+ for (i = 0, j = 0; i < geneLength; i++ ) {
+ if (bitmap_isSet(startBitmapPtr, i)) {
+ assert((i-j-1) < segmentLength);
+ j = i;
+ }
+ }
+
+ gene_free(genePtr);
+ segments_free(segmentsPtr);
+ random_free(randomPtr);
+ bitmap_free(startBitmapPtr);
+}
+
+
+int
+main ()
+{
+ bool_t status = memory_init(1, 4, 2)
+ assert(status);
+
+ puts("Starting...");
+
+ tester(10, 4, 20, TRUE);
+ tester(20, 5, 1, TRUE);
+ tester(100, 10, 1000, FALSE);
+ tester(100, 10, 1, FALSE);
+
+ puts("All tests passed.");
+
+ return 0;
+}
+
+
+#endif /* TEST_SEGMENTS */
+
+
+/* =============================================================================
+ *
+ * End of segments.c
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * segments.h
+ * -- Create random segments from random gene
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#ifndef SEGMENTS_H
+#define SEGMENTS_H 1
+
+
+#include "gene.h"
+#include "random.h"
+#include "vector.h"
+
+
+typedef struct segments {
+ long length;
+ long minNum;
+ vector_t* contentsPtr;
+/* private: */
+ char** strings;
+} segments_t;
+
+
+/* =============================================================================
+ * segments_alloc
+ * -- Does almost all the memory allocation for random segments
+ * -- The actual number of segments created by 'segments_create' may be larger
+ * than 'minNum' to ensure the segments overlap and cover the entire gene
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+segments_t*
+segments_alloc (long length, long minNum);
+
+
+/* =============================================================================
+ * segments_create
+ * -- Populates 'contentsPtr'
+ * =============================================================================
+ */
+void
+segments_create (segments_t* segmentsPtr, gene_t* genePtr, random_t* randomPtr);
+
+
+/* =============================================================================
+ * segments_free
+ * =============================================================================
+ */
+void
+segments_free (segments_t* segmentsPtr);
+
+
+#endif /* SEGMENTS_H */
+
+
+/* =============================================================================
+ *
+ * End of segments.h
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * sequencer.c
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * Algorithm overview:
+ *
+ * 1) Remove duplicate segments by using hash-set
+ * 2) Match segments using hash-based comparisons
+ * - Cycles are prevented by tracking starts/ends of matched chains
+ * 3) Build sequence
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#include "tm.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "hash.h"
+#include "hashtable.h"
+#include "segments.h"
+#include "sequencer.h"
+#include "table.h"
+#include "thread.h"
+#include "utility.h"
+#include "vector.h"
+#include "types.h"
+
+
+struct endInfoEntry {
+ bool_t isEnd;
+ long jumpToNext;
+};
+
+struct constructEntry {
+ bool_t isStart;
+ char* segment;
+ ulong_t endHash;
+ struct constructEntry* startPtr;
+ struct constructEntry* nextPtr;
+ struct constructEntry* endPtr;
+ long overlap;
+ long length;
+};
+
+
+/* =============================================================================
+ * hashString
+ * -- uses sdbm hash function
+ * =============================================================================
+ */
+static ulong_t
+hashString (char* str)
+{
+ ulong_t hash = 0;
+ long c;
+
+ /* Note: Do not change this hashing scheme */
+ while ((c = *str++) != '\0') {
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return (ulong_t)hash;
+}
+
+
+/* =============================================================================
+ * hashSegment
+ * -- For hashtable
+ * =============================================================================
+ */
+static ulong_t
+hashSegment (const void* keyPtr)
+{
+ return (ulong_t)hash_sdbm((char*)keyPtr); /* can be any "good" hash function */
+}
+
+
+/* =============================================================================
+ * compareSegment
+ * -- For hashtable
+ * =============================================================================
+ */
+static long
+compareSegment (const pair_t* a, const pair_t* b)
+{
+ return strcmp((char*)(a->firstPtr), (char*)(b->firstPtr));
+}
+
+
+/* =============================================================================
+ * sequencer_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+sequencer_t*
+sequencer_alloc (long geneLength, long segmentLength, segments_t* segmentsPtr)
+{
+ sequencer_t* sequencerPtr;
+ long maxNumUniqueSegment = geneLength - segmentLength + 1;
+ long i;
+
+ sequencerPtr = (sequencer_t*)malloc(sizeof(sequencer_t));
+ if (sequencerPtr == NULL) {
+ return NULL;
+ }
+
+ sequencerPtr->uniqueSegmentsPtr =
+ hashtable_alloc(geneLength, &hashSegment, &compareSegment, -1, -1);
+ if (sequencerPtr->uniqueSegmentsPtr == NULL) {
+ return NULL;
+ }
+
+ /* For finding a matching entry */
+ sequencerPtr->endInfoEntries =
+ (endInfoEntry_t*)malloc(maxNumUniqueSegment * sizeof(endInfoEntry_t));
+ for (i = 0; i < maxNumUniqueSegment; i++) {
+ endInfoEntry_t* endInfoEntryPtr = &sequencerPtr->endInfoEntries[i];
+ endInfoEntryPtr->isEnd = TRUE;
+ endInfoEntryPtr->jumpToNext = 1;
+ }
+ sequencerPtr->startHashToConstructEntryTables =
+ (table_t**)malloc(segmentLength * sizeof(table_t*));
+ if (sequencerPtr->startHashToConstructEntryTables == NULL) {
+ return NULL;
+ }
+ for (i = 1; i < segmentLength; i++) { /* 0 is dummy entry */
+ sequencerPtr->startHashToConstructEntryTables[i] =
+ table_alloc(geneLength, NULL);
+ if (sequencerPtr->startHashToConstructEntryTables[i] == NULL) {
+ return NULL;
+ }
+ }
+ sequencerPtr->segmentLength = segmentLength;
+
+ /* For constructing sequence */
+ sequencerPtr->constructEntries =
+ (constructEntry_t*)malloc(maxNumUniqueSegment * sizeof(constructEntry_t));
+ if (sequencerPtr->constructEntries == NULL) {
+ return NULL;
+ }
+ for (i= 0; i < maxNumUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &sequencerPtr->constructEntries[i];
+ constructEntryPtr->isStart = TRUE;
+ constructEntryPtr->segment = NULL;
+ constructEntryPtr->endHash = 0;
+ constructEntryPtr->startPtr = constructEntryPtr;
+ constructEntryPtr->nextPtr = NULL;
+ constructEntryPtr->endPtr = constructEntryPtr;
+ constructEntryPtr->overlap = 0;
+ constructEntryPtr->length = segmentLength;
+ }
+ sequencerPtr->hashToConstructEntryTable = table_alloc(geneLength, NULL);
+ if (sequencerPtr->hashToConstructEntryTable == NULL) {
+ return NULL;
+ }
+
+ sequencerPtr->segmentsPtr = segmentsPtr;
+
+ return sequencerPtr;
+}
+
+
+/* =============================================================================
+ * sequencer_run
+ * =============================================================================
+ */
+void
+sequencer_run (void* argPtr)
+{
+ TM_THREAD_ENTER();
+
+ long threadId = thread_getId();
+
+ sequencer_t* sequencerPtr = (sequencer_t*)argPtr;
+
+ hashtable_t* uniqueSegmentsPtr;
+ endInfoEntry_t* endInfoEntries;
+ table_t** startHashToConstructEntryTables;
+ constructEntry_t* constructEntries;
+ table_t* hashToConstructEntryTable;
+
+ uniqueSegmentsPtr = sequencerPtr->uniqueSegmentsPtr;
+ endInfoEntries = sequencerPtr->endInfoEntries;
+ startHashToConstructEntryTables = sequencerPtr->startHashToConstructEntryTables;
+ constructEntries = sequencerPtr->constructEntries;
+ hashToConstructEntryTable = sequencerPtr->hashToConstructEntryTable;
+
+ segments_t* segmentsPtr = sequencerPtr->segmentsPtr;
+ assert(segmentsPtr);
+ vector_t* segmentsContentsPtr = segmentsPtr->contentsPtr;
+ long numSegment = vector_getSize(segmentsContentsPtr);
+ long segmentLength = segmentsPtr->length;
+
+ long i;
+ long j;
+ long i_start;
+ long i_stop;
+ long numUniqueSegment;
+ long substringLength;
+ long entryIndex;
+
+ /*
+ * Step 1: Remove duplicate segments
+ */
+#if defined(HTM) || defined(STM)
+ long numThread = thread_getNumThread();
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = numSegment;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+#else /* !(HTM || STM) */
+ i_start = 0;
+ i_stop = numSegment;
+#endif /* !(HTM || STM) */
+ for (i = i_start; i < i_stop; i+=CHUNK_STEP1) {
+ TM_BEGIN();
+ {
+ long ii;
+ long ii_stop = MIN(i_stop, (i+CHUNK_STEP1));
+ for (ii = i; ii < ii_stop; ii++) {
+ void* segment = vector_at(segmentsContentsPtr, ii);
+ TMHASHTABLE_INSERT(uniqueSegmentsPtr,
+ segment,
+ segment);
+ } /* ii */
+ }
+ TM_END();
+ }
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2a: Iterate over unique segments and compute hashes.
+ *
+ * For the gene "atcg", the hashes for the end would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * And for the gene "tcgg", the hashes for the start would be:
+ *
+ * "t", "tc", and "tcg"
+ *
+ * The names are "end" and "start" because if a matching pair is found,
+ * they are the substring of the end part of the pair and the start
+ * part of the pair respectively. In the above example, "tcg" is the
+ * matching substring so:
+ *
+ * (end) (start)
+ * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg")
+ */
+
+ /* uniqueSegmentsPtr is constant now */
+ numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr);
+ entryIndex = 0;
+
+#if defined(HTM) || defined(STM)
+ {
+ /* Choose disjoint segments [i_start,i_stop) for each thread */
+ long num = uniqueSegmentsPtr->numBucket;
+ long partitionSize = (num + numThread/2) / numThread; /* with rounding */
+ i_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ i_stop = num;
+ } else {
+ i_stop = i_start + partitionSize;
+ }
+ }
+ {
+ /* Approximate disjoint segments of element allocation in constructEntries */
+ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ entryIndex = threadId * partitionSize;
+ }
+#else /* !(HTM || STM) */
+ i_start = 0;
+ i_stop = uniqueSegmentsPtr->numBucket;
+ entryIndex = 0;
+#endif /* !(HTM || STM) */
+
+ for (i = i_start; i < i_stop; i++) {
+
+ list_t* chainPtr = uniqueSegmentsPtr->buckets[i];
+ list_iter_t it;
+ list_iter_reset(&it, chainPtr);
+
+ while (list_iter_hasNext(&it, chainPtr)) {
+
+ char* segment =
+ (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr;
+ constructEntry_t* constructEntryPtr;
+ long j;
+ ulong_t startHash;
+ bool_t status;
+
+ /* Find an empty constructEntries entry */
+ TM_BEGIN();
+ while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) {
+ entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */
+ }
+ constructEntryPtr = &constructEntries[entryIndex];
+ TM_SHARED_WRITE_P(constructEntryPtr->segment, segment);
+ TM_END();
+ entryIndex = (entryIndex + 1) % numUniqueSegment;
+
+ /*
+ * Save hashes (sdbm algorithm) of segment substrings
+ *
+ * endHashes will be computed for shorter substrings after matches
+ * have been made (in the next phase of the code). This will reduce
+ * the number of substrings for which hashes need to be computed.
+ *
+ * Since we can compute startHashes incrementally, we go ahead
+ * and compute all of them here.
+ */
+ /* constructEntryPtr is local now */
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]);
+
+ startHash = 0;
+ for (j = 1; j < segmentLength; j++) {
+ startHash = (ulong_t)segment[j-1] +
+ (startHash << 6) + (startHash << 16) - startHash;
+ TM_BEGIN();
+ status = TMTABLE_INSERT(startHashToConstructEntryTables[j],
+ (ulong_t)startHash,
+ (void*)constructEntryPtr );
+ TM_END();
+ assert(status);
+ }
+
+ /*
+ * For looking up construct entries quickly
+ */
+ startHash = (ulong_t)segment[j-1] +
+ (startHash << 6) + (startHash << 16) - startHash;
+ TM_BEGIN();
+ status = TMTABLE_INSERT(hashToConstructEntryTable,
+ (ulong_t)startHash,
+ (void*)constructEntryPtr);
+ TM_END();
+ assert(status);
+ }
+ }
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2b: Match ends to starts by using hash-based string comparison.
+ */
+ for (substringLength = segmentLength-1; substringLength > 0; substringLength--) {
+
+ table_t* startHashToConstructEntryTablePtr =
+ startHashToConstructEntryTables[substringLength];
+ list_t** buckets = startHashToConstructEntryTablePtr->buckets;
+ long numBucket = startHashToConstructEntryTablePtr->numBucket;
+
+ long index_start;
+ long index_stop;
+
+#if defined(HTM) || defined(STM)
+ {
+ /* Choose disjoint segments [index_start,index_stop) for each thread */
+ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */
+ index_start = threadId * partitionSize;
+ if (threadId == (numThread - 1)) {
+ index_stop = numUniqueSegment;
+ } else {
+ index_stop = index_start + partitionSize;
+ }
+ }
+#else /* !(HTM || STM) */
+ index_start = 0;
+ index_stop = numUniqueSegment;
+#endif /* !(HTM || STM) */
+
+ /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */
+ for (entryIndex = index_start;
+ entryIndex < index_stop;
+ entryIndex += endInfoEntries[entryIndex].jumpToNext)
+ {
+ if (!endInfoEntries[entryIndex].isEnd) {
+ continue;
+ }
+
+ /* ConstructEntries[entryIndex] is local data */
+ constructEntry_t* endConstructEntryPtr =
+ &constructEntries[entryIndex];
+ char* endSegment = endConstructEntryPtr->segment;
+ ulong_t endHash = endConstructEntryPtr->endHash;
+
+ list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */
+ list_iter_t it;
+ list_iter_reset(&it, chainPtr);
+
+ /* Linked list at chainPtr is constant */
+ while (list_iter_hasNext(&it, chainPtr)) {
+
+ constructEntry_t* startConstructEntryPtr =
+ (constructEntry_t*)list_iter_next(&it, chainPtr);
+ char* startSegment = startConstructEntryPtr->segment;
+ long newLength = 0;
+
+ /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */
+ TM_BEGIN();
+
+ /* Check if matches */
+ if (TM_SHARED_READ(startConstructEntryPtr->isStart) &&
+ (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) &&
+ (strncmp(startSegment,
+ &endSegment[segmentLength - substringLength],
+ substringLength) == 0))
+ {
+ TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE);
+
+ constructEntry_t* startConstructEntry_endPtr;
+ constructEntry_t* endConstructEntry_startPtr;
+
+ /* Update endInfo (appended something so no longer end) */
+ TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE);
+
+ /* Update segment chain construct info */
+ startConstructEntry_endPtr =
+ (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr);
+ endConstructEntry_startPtr =
+ (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr);
+
+ assert(startConstructEntry_endPtr);
+ assert(endConstructEntry_startPtr);
+ TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr,
+ endConstructEntry_startPtr);
+ TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr,
+ startConstructEntryPtr);
+ TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr,
+ startConstructEntry_endPtr);
+ TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength);
+ newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) +
+ (long)TM_SHARED_READ(startConstructEntryPtr->length) -
+ substringLength;
+ TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength);
+ } /* if (matched) */
+
+ TM_END();
+
+ if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */
+ break;
+ }
+ } /* iterate over chain */
+
+ } /* for (endIndex < numUniqueSegment) */
+
+ thread_barrier_wait();
+
+ /*
+ * Step 2c: Update jump values and hashes
+ *
+ * endHash entries of all remaining ends are updated to the next
+ * substringLength. Additionally jumpToNext entries are updated such
+ * that they allow to skip non-end entries. Currently this is sequential
+ * because parallelization did not perform better.
+. */
+
+ if (threadId == 0) {
+ if (substringLength > 1) {
+ long index = segmentLength - substringLength + 1;
+ /* initialization if j and i: with i being the next end after j=0 */
+ for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) {
+ /* find first non-null */
+ }
+ /* entry 0 is handled seperately from the loop below */
+ endInfoEntries[0].jumpToNext = i;
+ if (endInfoEntries[0].isEnd) {
+ constructEntry_t* constructEntryPtr = &constructEntries[0];
+ char* segment = constructEntryPtr->segment;
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]);
+ }
+ /* Continue scanning (do not reset i) */
+ for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) {
+ if (endInfoEntries[i].isEnd) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ char* segment = constructEntryPtr->segment;
+ constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]);
+ endInfoEntries[j].jumpToNext = MAX(1, (i - j));
+ j = i;
+ }
+ }
+ endInfoEntries[j].jumpToNext = i - j;
+ }
+ }
+
+ thread_barrier_wait();
+
+ } /* for (substringLength > 0) */
+
+
+ thread_barrier_wait();
+
+ /*
+ * Step 3: Build sequence string
+ */
+ if (threadId == 0) {
+
+ long totalLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ if (constructEntryPtr->isStart) {
+ totalLength += constructEntryPtr->length;
+ }
+ }
+
+ sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char));
+ char* sequence = sequencerPtr->sequence;
+ assert(sequence);
+
+ char* copyPtr = sequence;
+ long sequenceLength = 0;
+
+ for (i = 0; i < numUniqueSegment; i++) {
+ constructEntry_t* constructEntryPtr = &constructEntries[i];
+ /* If there are several start segments, we append in arbitrary order */
+ if (constructEntryPtr->isStart) {
+ long newSequenceLength = sequenceLength + constructEntryPtr->length;
+ assert( newSequenceLength <= totalLength );
+ copyPtr = sequence + sequenceLength;
+ sequenceLength = newSequenceLength;
+ do {
+ long numChar = segmentLength - constructEntryPtr->overlap;
+ if ((copyPtr + numChar) > (sequence + newSequenceLength)) {
+ TM_PRINT0("ERROR: sequence length != actual length\n");
+ break;
+ }
+ memcpy(copyPtr,
+ constructEntryPtr->segment,
+ (numChar * sizeof(char)));
+ copyPtr += numChar;
+ } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL);
+ assert(copyPtr <= (sequence + sequenceLength));
+ }
+ }
+
+ assert(sequence != NULL);
+ sequence[sequenceLength] = '\0';
+ }
+
+ TM_THREAD_EXIT();
+}
+
+
+/* =============================================================================
+ * sequencer_free
+ * =============================================================================
+ */
+void
+sequencer_free (sequencer_t* sequencerPtr)
+{
+ long i;
+
+ table_free(sequencerPtr->hashToConstructEntryTable);
+ free(sequencerPtr->constructEntries);
+ for (i = 1; i < sequencerPtr->segmentLength; i++) {
+ table_free(sequencerPtr->startHashToConstructEntryTables[i]);
+ }
+ free(sequencerPtr->startHashToConstructEntryTables);
+ free(sequencerPtr->endInfoEntries);
+#if 0
+ /* TODO: fix mixed sequential/parallel allocation */
+ hashtable_free(sequencerPtr->uniqueSegmentsPtr);
+ if (sequencerPtr->sequence != NULL) {
+ free(sequencerPtr->sequence);
+ }
+#endif
+ free(sequencerPtr);
+}
+
+
+/* =============================================================================
+ * TEST_SEQUENCER
+ * =============================================================================
+ */
+#ifdef TEST_SEQUENCER
+
+
+#include <assert.h>
+#include <stdio.h>
+#include "segments.h"
+
+
+char* gene1 = "gatcggcagc";
+char* segments1[] = {
+ "atcg",
+ "gcag",
+ "tcgg",
+ "cagc",
+ "gatc",
+ NULL
+};
+
+char* gene2 = "aaagc";
+char* segments2[] = {
+ "aaa",
+ "aag",
+ "agc",
+ NULL
+};
+
+char* gene3 = "aaacaaagaaat";
+char* segments3[] = {
+ "aaac",
+ "aaag",
+ "aaat",
+ NULL
+};
+
+char* gene4 = "ttggctacgtatcgcacggt";
+char* segments4[] = {
+ "cgtatcgc",
+ "tcgcacgg",
+ "gtatcgca",
+ "tatcgcac",
+ "atcgcacg",
+ "ttggctac",
+ "ctacgtat",
+ "acgtatcg",
+ "ctacgtat",
+ "cgtatcgc",
+ "atcgcacg",
+ "ggctacgt",
+ "tacgtatc",
+ "tcgcacgg",
+ "ttggctac",
+ "ggctacgt",
+ "atcgcacg",
+ "tatcgcac",
+ "cgtatcgc",
+ "acgtatcg",
+ "gtatcgca",
+ "gtatcgca",
+ "cgcacggt",
+ "tatcgcac",
+ "ttggctac",
+ "atcgcacg",
+ "acgtatcg",
+ "gtatcgca",
+ "ttggctac",
+ "tggctacg",
+ NULL
+};
+
+char* gene5 = "gatcggcagctggtacggcg";
+char* segments5[] = {
+ "atcggcag",
+ "gtacggcg",
+ "gatcggca",
+ "cagctggt",
+ "tggtacgg",
+ "gatcggca",
+ "gatcggca",
+ "tcggcagc",
+ "ggtacggc",
+ "tggtacgg",
+ "tcggcagc",
+ "gcagctgg",
+ "gatcggca",
+ "gctggtac",
+ "gatcggca",
+ "ctggtacg",
+ "ggcagctg",
+ "tcggcagc",
+ "gtacggcg",
+ "gcagctgg",
+ "ggcagctg",
+ "tcggcagc",
+ "cagctggt",
+ "tggtacgg",
+ "cagctggt",
+ "gcagctgg",
+ "gctggtac",
+ "cggcagct",
+ "agctggta",
+ "ctggtacg",
+ NULL
+};
+
+char* gene6 = "ttggtgagccgtaagactcc";
+char* segments6[] = {
+ "cgtaagac",
+ "taagactc",
+ "gtgagccg",
+ "gagccgta",
+ "gccgtaag",
+ "tgagccgt",
+ "gccgtaag",
+ "cgtaagac",
+ "ttggtgag",
+ "agccgtaa",
+ "gccgtaag",
+ "aagactcc",
+ "ggtgagcc",
+ "ttggtgag",
+ "agccgtaa",
+ "gagccgta",
+ "aagactcc",
+ "ttggtgag",
+ "gtaagact",
+ "ccgtaaga",
+ "ttggtgag",
+ "gagccgta",
+ "ggtgagcc",
+ "gagccgta",
+ "gccgtaag",
+ "aagactcc",
+ "gtaagact",
+ "ccgtaaga",
+ "tgagccgt",
+ "ttggtgag",
+ NULL
+};
+
+char* gene7 = "gatcggcagctggtacggcg";
+char* segments7[] = {
+ "atcggcag",
+ "gtacggcg",
+ "gatcggca",
+ "cagctggt",
+ "tggtacgg",
+ "gatcggca",
+ "gatcggca",
+ "tcggcagc",
+ "ggtacggc",
+ "tggtacgg",
+ "tcggcagc",
+ "gcagctgg",
+ "gatcggca",
+ "gctggtac",
+ "gatcggca",
+ "ctggtacg",
+ "ggcagctg",
+ "tcggcagc",
+ "gtacggcg",
+ "gcagctgg",
+ "ggcagctg",
+ "tcggcagc",
+ "cagctggt",
+ "tggtacgg",
+ "cagctggt",
+ "gcagctgg",
+ "gctggtac",
+ "cggcagct",
+ "agctggta",
+ "ctggtacg",
+ NULL
+};
+
+char* gene8 = "ttggtgagccgtaagactcc";
+char* segments8[] = {
+ "cgtaagac",
+ "taagactc",
+ "gtgagccg",
+ "gagccgta",
+ "gccgtaag",
+ "tgagccgt",
+ "gccgtaag",
+ "cgtaagac",
+ "ttggtgag",
+ "agccgtaa",
+ "gccgtaag",
+ "aagactcc",
+ "ggtgagcc",
+ "ttggtgag",
+ "agccgtaa",
+ "gagccgta",
+ "aagactcc",
+ "ttggtgag",
+ "gtaagact",
+ "ccgtaaga",
+ "ttggtgag",
+ "gagccgta",
+ "ggtgagcc",
+ "gagccgta",
+ "gccgtaag",
+ "aagactcc",
+ "gtaagact",
+ "ccgtaaga",
+ "tgagccgt",
+ "ttggtgag",
+ NULL
+};
+
+
+static segments_t*
+createSegments (char* segments[])
+{
+ long i = 0;
+ segments_t* segmentsPtr = (segments_t*)malloc(sizeof(segments));
+
+ segmentsPtr->length = strlen(segments[0]);
+ segmentsPtr->contentsPtr = vector_alloc(1);
+
+ while (segments[i] != NULL) {
+ bool_t status = vector_pushBack(segmentsPtr->contentsPtr,
+ (void*)segments[i]);
+ assert(status);
+ i++;
+ }
+
+ segmentsPtr->minNum = vector_getSize(segmentsPtr->contentsPtr);
+
+ return segmentsPtr;
+}
+
+
+static void
+tester (char* gene, char* segments[])
+{
+ segments_t* segmentsPtr;
+ sequencer_t* sequencerPtr;
+
+ segmentsPtr = createSegments(segments);
+ sequencerPtr = sequencer_alloc(strlen(gene), segmentsPtr->length, segmentsPtr);
+
+ sequencer_run((void*)sequencerPtr);
+
+ printf("gene = %s\n", gene);
+ printf("sequence = %s\n", sequencerPtr->sequence);
+ assert(strcmp(sequencerPtr->sequence, gene) == 0);
+
+ sequencer_free(sequencerPtr);
+}
+
+
+int
+main ()
+{
+ bool_t status = memory_init(1, 4, 2);
+ assert(status);
+ thread_startup(1);
+
+ puts("Starting...");
+
+ /* Simple test */
+ tester(gene1, segments1);
+
+ /* Simple test with aliasing segments */
+ tester(gene2, segments2);
+
+ /* Simple test with non-overlapping segments */
+ tester(gene3, segments3);
+
+ /* Complex tests */
+ tester(gene4, segments4);
+ tester(gene5, segments5);
+ tester(gene6, segments6);
+ tester(gene7, segments7);
+ tester(gene8, segments8);
+
+ puts("Passed all tests.");
+
+ return 0;
+}
+
+
+#endif /* TEST_SEQUENCER */
+
+
+/* =============================================================================
+ *
+ * End of sequencer.c
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * sequencer.h
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#ifndef SEQUENCER_H
+#define SEQUENCER_H 1
+
+
+#include "hashtable.h"
+#include "segments.h"
+#include "table.h"
+#include "tm.h"
+
+
+typedef struct endInfoEntry endInfoEntry_t;
+typedef struct constructEntry constructEntry_t;
+
+
+typedef struct sequencer {
+
+/* public: */
+
+ char* sequence;
+
+/* private: */
+
+ segments_t* segmentsPtr;
+
+ /* For removing duplicate segments */
+ hashtable_t* uniqueSegmentsPtr;
+
+ /* For matching segments */
+ endInfoEntry_t* endInfoEntries;
+ table_t** startHashToConstructEntryTables;
+
+ /* For constructing sequence */
+ constructEntry_t* constructEntries;
+ table_t* hashToConstructEntryTable;
+
+ /* For deallocation */
+ long segmentLength;
+
+} sequencer_t;
+
+
+typedef struct sequencer_run_arg {
+ sequencer_t* sequencerPtr;
+ segments_t* segmentsPtr;
+ long preAllocLength;
+ char* returnSequence; /* variable stores return value */
+} sequencer_run_arg_t;
+
+
+/* =============================================================================
+ * sequencer_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+sequencer_t*
+sequencer_alloc (long geneLength, long segmentLength, segments_t* segmentsPtr);
+
+
+/* =============================================================================
+ * sequencer_run
+ * =============================================================================
+ */
+
+void
+sequencer_run (void* argPtr);
+
+
+/* =============================================================================
+ * sequencer_free
+ * =============================================================================
+ */
+void
+sequencer_free (sequencer_t* sequencerPtr);
+
+
+#endif /* SEQUENCER_H */
+
+
+/* =============================================================================
+ *
+ * End of sequencer.h
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * sequencer.h
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#ifndef SEQUENCER_H
+#define SEQUENCER_H 1
+
+
+#include "hashtable.h"
+#include "segments.h"
+#include "table.h"
+#include "tm.h"
+
+
+typedef struct endInfoEntry endInfoEntry_t;
+typedef struct constructEntry constructEntry_t;
+
+
+typedef struct sequencer {
+
+/* public: */
+
+ char* sequence;
+
+/* private: */
+
+ segments_t* segmentsPtr;
+
+ /* For removing duplicate segments */
+ hashtable_t* uniqueSegmentsPtr;
+
+ /* For matching segments */
+ endInfoEntry_t* endInfoEntries;
+ table_t** startHashToConstructEntryTables;
+
+ /* For constructing sequence */
+ constructEntry_t* constructEntries;
+ table_t* hashToConstructEntryTable;
+
+ /* For deallocation */
+ long segmentLength;
+
+} sequencer_t;
+
+
+typedef struct sequencer_run_arg {
+ sequencer_t* sequencerPtr;
+ segments_t* segmentsPtr;
+ long preAllocLength;
+ char* returnSequence; /* variable stores return value */
+} sequencer_run_arg_t;
+
+
+/* =============================================================================
+ * sequencer_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+sequencer_t*
+sequencer_alloc (long geneLength, long segmentLength, segments_t* segmentsPtr);
+
+
+/* =============================================================================
+ * sequencer_run
+ * =============================================================================
+ */
+
+void
+sequencer_run (void* argPtr);
+
+
+/* =============================================================================
+ * sequencer_free
+ * =============================================================================
+ */
+void
+sequencer_free (sequencer_t* sequencerPtr);
+
+
+#endif /* SEQUENCER_H */
+
+
+/* =============================================================================
+ *
+ * End of sequencer.h
+ *
+ * =============================================================================
+ */
--- /dev/null
+#include <stdio.h>
+
+void function() {
+ static int myInt = 1;
+ printf("myInt:%d\n", myInt);
+ myInt *= 2;
+ printf("myInt:%d\n", myInt);
+}
+
+int main(int argc,char *argv[])
+{
+ function();
+ function();
+ function();
+}
--- /dev/null
+#include <stdio.h>
+
+void function() {
+ static int myInt = 0;
+ printf("myInt:%d\n", myInt);
+ myInt *= 2;
+ printf("myInt:%d\n", myInt);
+}
+
+int main(int argc,char *argv[])
+{
+ function();
+ function();
+ function();
+}
--- /dev/null
+/* =============================================================================
+ *
+ * table.c
+ * -- Fixed-size hash table
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include "list.h"
+#include "table.h"
+#include "types.h"
+
+
+/* =============================================================================
+ * table_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+table_t*
+table_alloc (long numBucket, long (*compare)(const void*, const void*))
+{
+ table_t* tablePtr;
+ long i;
+
+ tablePtr = (table_t*)malloc(sizeof(table_t));
+ if (tablePtr == NULL) {
+ return NULL;
+ }
+
+ tablePtr->buckets = (list_t**)malloc(numBucket * sizeof(list_t*));
+ if (tablePtr->buckets == NULL) {
+ return NULL;
+ }
+
+ for (i = 0; i < numBucket; i++) {
+ tablePtr->buckets[i] = list_alloc(compare);
+ if (tablePtr->buckets[i] == NULL) {
+ return NULL;
+ }
+ }
+
+ tablePtr->numBucket = numBucket;
+
+ return tablePtr;
+}
+
+
+/* =============================================================================
+ * table_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+table_insert (table_t* tablePtr, ulong_t hash, void* dataPtr)
+{
+ long i = hash % tablePtr->numBucket;
+
+ if (!list_insert(tablePtr->buckets[i], dataPtr)) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/* =============================================================================
+ * TMtable_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+TMtable_insert (TM_ARGDECL table_t* tablePtr, ulong_t hash, void* dataPtr)
+{
+ long i = hash % tablePtr->numBucket;
+
+ if (!TMLIST_INSERT(tablePtr->buckets[i], dataPtr)) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/* =============================================================================
+ * table_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+table_remove (table_t* tablePtr, ulong_t hash, void* dataPtr)
+{
+ long i = hash % tablePtr->numBucket;
+
+ if (!list_remove(tablePtr->buckets[i], dataPtr)) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/* =============================================================================
+ * table_free
+ * =============================================================================
+ */
+void
+table_free (table_t* tablePtr)
+{
+#if 0
+ /* TODO: fix mixed sequential/parallel allocation */
+ long i;
+
+ for (i = 0; i < tablePtr->numBucket; i++) {
+ list_free(tablePtr->buckets[i]);
+ }
+#endif
+
+ free(tablePtr);
+}
+
+
+/* =============================================================================
+ * TEST_TABLE
+ * =============================================================================
+ */
+#ifdef TEST_TABLE
+
+
+#include <stdio.h>
+
+
+static void
+printTable (table_t* tablePtr)
+{
+ long i;
+
+ for (i = 0; i < tablePtr->numBucket; i++) {
+ list_iter_t it;
+ printf("%2i: [", i);
+ list_iter_reset(&it, tablePtr->buckets[i]);
+ while (list_iter_hasNext(&it, tablePtr->buckets[i])) {
+ printf("%li ", *(long*)list_iter_next(&it, tablePtr->buckets[i]));
+ }
+ puts("]");
+ }
+}
+
+
+int
+main ()
+{
+ table_t* tablePtr;
+ long hash[] = {3, 1, 4, 1, 5, 9, 2, 6, 8, 7, -1};
+ long i;
+
+ bool_t status = memory_init(1, 4, 2);
+ assert(status);
+
+ puts("Starting...");
+
+ tablePtr = table_alloc(8, NULL);
+
+ for (i = 0; hash[i] >= 0; i++ ) {
+ bool_t status = table_insert(tablePtr,
+ (ulong_t)hash[i],
+ (void*)&hash[i])
+ assert(status);
+ printTable(tablePtr);
+ puts("");
+ }
+
+ for (i = 0; hash[i] >= 0; i++ ) {
+ bool_t status = table_remove(tablePtr,
+ (ulong_t)hash[i],
+ (void*)&hash[i])
+ assert(status);
+ printTable(tablePtr);
+ puts("");
+ }
+
+ table_free(tablePtr);
+
+ puts("Done.");
+
+ return 0;
+}
+
+
+#endif /* TEST_TABLE */
+
+
+/* =============================================================================
+ *
+ * End of table.c
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * table.h
+ * -- Fixed-size hash table
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Stanford University, 2006. All Rights Reserved.
+ * Author: Chi Cao Minh
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+
+#ifndef TABLE_H
+#define TABLE_H 1
+
+
+#include "list.h"
+#include "types.h"
+
+
+typedef struct table {
+ list_t** buckets;
+ long numBucket;
+} table_t;
+
+
+/* =============================================================================
+ * table_alloc
+ * -- Returns NULL on failure
+ * =============================================================================
+ */
+table_t*
+table_alloc (long numBucket, long (*compare)(const void*, const void*));
+
+
+/* =============================================================================
+ * table_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+table_insert (table_t* tablePtr, ulong_t hash, void* dataPtr);
+
+
+/* =============================================================================
+ * TMtable_insert
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+TMtable_insert (TM_ARGDECL table_t* tablePtr, ulong_t hash, void* dataPtr);
+
+
+/* =============================================================================
+ * table_remove
+ * -- Returns TRUE if successful, else FALSE
+ * =============================================================================
+ */
+bool_t
+table_remove (table_t* tablePtr, ulong_t hash, void* dataPtr);
+
+
+/* =============================================================================
+ * table_free
+ * =============================================================================
+ */
+void
+table_free (table_t* tablePtr);
+
+
+#define TMTABLE_INSERT(t, h, d) TMtable_insert(TM_ARG t, h, d)
+
+
+#endif /* TABLE_H */
+
+
+/* =============================================================================
+ *
+ * End of table.h
+ *
+ * =============================================================================
+ */