1 package edu.uci.iotproject.comparison.seqalignment;
4 * A generic implementation of the sequence alignment algorithm given in Kleinberg's and Tardos' "Algorithm Design".
5 * This implementation is the basic version. There is a more complex version which significantly reduces the space
6 * complexity at a slight cost to time complexity.
8 * @param <ALIGNMENT_UNIT> The <em>unit of the alignment</em>, or, in other words, the <em>granularity</em> of the
9 * alignment. For example, for 'classical' string alignment (as in sequence alignment where we
10 * try to align two strings character by character -- the example most often used in books on
11 * algorithms) this would be a {@link Character}. As a second example, by specifying
12 * {@link String}, one can decrease the granularity so as to align <em>blocks</em> of characters
13 * (e.g., if one wants to align to two string arrays).
15 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
16 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
18 public class SequenceAlignment<ALIGNMENT_UNIT> {
22 * Provides the cost of aligning two {@link ALIGNMENT_UNIT}s with one another as well as the cost of aligning an
23 * {@link ALIGNMENT_UNIT} with a gap.
25 private final AlignmentPricer<ALIGNMENT_UNIT> mAlignmentPricer;
28 * Constructs a new {@link SequenceAlignment}. The new instance relies on the provided {@code alignmentPricer} to
29 * provide the cost of aligning two {@link ALIGNMENT_UNIT}s as well as the cost of aligning an
30 * {@link ALIGNMENT_UNIT} with a gap.
32 * @param alignmentPricer An {@link AlignmentPricer} that provides the cost of aligning two {@link ALIGNMENT_UNIT}s
33 * with one another as well as the cost of aligning an {@link ALIGNMENT_UNIT} with a gap.
35 public SequenceAlignment(AlignmentPricer<ALIGNMENT_UNIT> alignmentPricer) {
36 mAlignmentPricer = alignmentPricer;
41 * Calculates the cost of aligning {@code sequence1} with {@code sequence2}.
43 * @param sequence1 A sequence that is to be aligned with {@code sequence2}.
44 * @param sequence2 A sequence that is to be aligned with {@code sequence1}.
46 * @return The cost of aligning {@code sequence1} with {@code sequence2}.
48 public int calculateAlignment(ALIGNMENT_UNIT[] sequence1, ALIGNMENT_UNIT[] sequence2) {
49 int[][] costs = new int[sequence1.length + 1][sequence2.length +1];
52 * This is a homebrewn initialization; it is different from the one in the Kleinberg book - is it correct?
53 * It tries to add support for *different* gap costs depending on the input (e.g., such that one can say that
54 * matching a 'c' with a gap is more expensive than matching a 'b' with a gap).
56 for (int i = 1; i <= sequence1.length; i++) {
57 costs[i][0] = mAlignmentPricer.alignmentCost(sequence1[i-1], null) + costs[i-1][0];
59 for (int j = 1; j <= sequence2.length; j++) {
60 costs[0][j] = mAlignmentPricer.alignmentCost(sequence2[j-1], null) + costs[0][j-1];
62 for (int j = 1; j <= sequence2.length; j++) {
63 for (int i = 1; i <= sequence1.length; i++) {
64 // The cost when current items of both sequences are aligned.
65 int costAligned = mAlignmentPricer.alignmentCost(sequence2[j-1], sequence1[i-1]) + costs[i-1][j-1];
66 // The cost when current item from sequence1 is not aligned (it's matched with a gap)
67 int seq1ItemNotMached = mAlignmentPricer.alignmentCost(sequence1[i-1], null) + costs[i-1][j];
68 // The cost when current item from sequence2 is not aligned (it's matched with a gap)
69 int seq2ItemNotMached = mAlignmentPricer.alignmentCost(sequence2[j-1], null) + costs[i][j-1];
70 costs[i][j] = Math.min(costAligned, Math.min(seq1ItemNotMached, seq2ItemNotMached));
73 return costs[sequence1.length][sequence2.length];