--- /dev/null
+package edu.uci.iotproject.comparison.seqalignment;
+
+import java.util.function.ToIntBiFunction;
+import java.util.function.ToIntFunction;
+
+/**
+ * Provides a generic implementation for the calculation of the cost of aligning two elements of a sequence as part of
+ * the sequence alignment algorithm (the algorithm is implemented in {@link SequenceAlignment}).
+ *
+ * @param <T> The type of the elements that are being aligned.
+ *
+ * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
+ * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
+ */
+public class AlignmentPricer<T> {
+
+ /**
+ * A function that provides the cost of aligning a {@link T} with a gap.
+ */
+ private final ToIntFunction<T> mGapCostFunction;
+
+ /**
+ * A function that provides the cost of aligning a {@link T} with some other {@link T}.
+ */
+ private final ToIntBiFunction<T,T> mAlignmentCostFunction;
+
+ /**
+ * Constructs a new {@link AlignmentPricer}.
+ *
+ * @param alignmentCostFunction A function that specifies the cost of aligning a {@link T} with some other {@link T}
+ * (e.g., based on the values of the properties of the two instances).
+ * @param gapCostFunction A function that specifies the cost of aligning a {@link T} with a gap. Note that the
+ * function is free to specify <em>different</em> gap costs for different {@link T}s.
+ */
+ public AlignmentPricer(ToIntBiFunction<T,T> alignmentCostFunction, ToIntFunction<T> gapCostFunction) {
+ mAlignmentCostFunction = alignmentCostFunction;
+ mGapCostFunction = gapCostFunction;
+ }
+
+ /**
+ * Calculate the cost of aligning {@code item1} with {@code item2}. If either of the two arguments is set to
+ * {@code null}, the cost of aligning the other argument with a gap will be returned. Note that both arguments
+ * cannot be {@code null} at the same time as that translates to aligning a gap with a gap, which is pointless.
+ *
+ * @param item1 The first of the two aligned objects. Set to {@code null} to calculate the cost of aligning
+ * {@code item2} with a gap.
+ * @param item2 The second of the two aligned objects. Set to {@code null} to calculate the cost of aligning
+ * {@code item2} with a gap.
+ * @return The cost of aligning {@code item1} with {@code item2}.
+ */
+ public int alignmentCost(T item1, T item2) {
+ // If both arguments are null, the caller is aligning a gap with a gap which is pointless might as well remove
+ // both gaps in that case!)
+ if (item1 == null && item2 == null) {
+ throw new IllegalArgumentException("Both arguments cannot be null: you are aligning a gap with a gap!");
+ }
+ // If one item is null, it means we're aligning an int with a gap.
+ // Invoke the provided gap cost function to get the gap cost.
+ if (item1 == null) {
+ return mGapCostFunction.applyAsInt(item2);
+ }
+ if (item2 == null) {
+ return mGapCostFunction.applyAsInt(item1);
+ }
+ // If both arguments are present, we simply delegate the task of calculating the cost of aligning the two items
+ // to the provided alignment cost function.
+ return mAlignmentCostFunction.applyAsInt(item1, item2);
+ }
+
+}
--- /dev/null
+package edu.uci.iotproject.comparison.seqalignment;
+
+/**
+ * A generic implementation of the sequence alignment algorithm given in Kleinberg's and Tardos' "Algorithm Design".
+ * This implementation is the basic version. There is a more complex version which significantly reduces the space
+ * complexity at a slight cost to time complexity.
+ *
+ * @param <ALIGNMENT_UNIT> The <em>unit of the alignment</em>, or, in other words, the <em>granularity</em> of the
+ * alignment. For example, for 'classical' string alignment (as in sequence alignment where we
+ * try to align two strings character by character -- the example most often used in books on
+ * algorithms) this would be a {@link Character}. As a second example, by specifying
+ * {@link String}, one can decrease the granularity so as to align <em>blocks</em> of characters
+ * (e.g., if one wants to align to two string arrays).
+ *
+ * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
+ * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
+ */
+public class SequenceAlignment<ALIGNMENT_UNIT> {
+
+
+ /**
+ * Provides the cost of aligning two {@link ALIGNMENT_UNIT}s with one another as well as the cost of aligning an
+ * {@link ALIGNMENT_UNIT} with a gap.
+ */
+ private final AlignmentPricer<ALIGNMENT_UNIT> mAlignmentPricer;
+
+ /**
+ * Constructs a new {@link SequenceAlignment}. The new instance relies on the provided {@code alignmentPricer} to
+ * provide the cost of aligning two {@link ALIGNMENT_UNIT}s as well as the cost of aligning an
+ * {@link ALIGNMENT_UNIT} with a gap.
+ *
+ * @param alignmentPricer An {@link AlignmentPricer} that provides the cost of aligning two {@link ALIGNMENT_UNIT}s
+ * with one another as well as the cost of aligning an {@link ALIGNMENT_UNIT} with a gap.
+ */
+ public SequenceAlignment(AlignmentPricer<ALIGNMENT_UNIT> alignmentPricer) {
+ mAlignmentPricer = alignmentPricer;
+ }
+
+
+ /**
+ * Calculates the cost of aligning {@code sequence1} with {@code sequence2}.
+ *
+ * @param sequence1 A sequence that is to be aligned with {@code sequence2}.
+ * @param sequence2 A sequence that is to be aligned with {@code sequence1}.
+ *
+ * @return The cost of aligning {@code sequence1} with {@code sequence2}.
+ */
+ public int calculateAlignment(ALIGNMENT_UNIT[] sequence1, ALIGNMENT_UNIT[] sequence2) {
+ int[][] costs = new int[sequence1.length + 1][sequence2.length +1];
+ /*
+ * TODO:
+ * This is a homebrewn initialization; it is different from the one in the Kleinberg book - is it correct?
+ * It tries to add support for *different* gap costs depending on the input (e.g., such that one can say that
+ * matching a 'c' with a gap is more expensive than matching a 'b' with a gap).
+ */
+ for (int i = 1; i <= sequence1.length; i++) {
+ costs[i][0] = mAlignmentPricer.alignmentCost(sequence1[i-1], null) + costs[i-1][0];
+ }
+ for (int j = 1; j <= sequence2.length; j++) {
+ costs[0][j] = mAlignmentPricer.alignmentCost(sequence2[j-1], null) + costs[0][j-1];
+ }
+ for (int j = 1; j <= sequence2.length; j++) {
+ for (int i = 1; i <= sequence1.length; i++) {
+ // The cost when current items of both sequences are aligned.
+ int costAligned = mAlignmentPricer.alignmentCost(sequence2[j-1], sequence1[i-1]) + costs[i-1][j-1];
+ // The cost when current item from sequence1 is not aligned (it's matched with a gap)
+ int seq1ItemNotMached = mAlignmentPricer.alignmentCost(sequence1[i-1], null) + costs[i-1][j];
+ // The cost when current item from sequence2 is not aligned (it's matched with a gap)
+ int seq2ItemNotMached = mAlignmentPricer.alignmentCost(sequence2[j-1], null) + costs[i][j-1];
+ costs[i][j] = Math.min(costAligned, Math.min(seq1ItemNotMached, seq2ItemNotMached));
+ }
+ }
+ return costs[sequence1.length][sequence2.length];
+ }
+}
--- /dev/null
+package edu.uci.iotproject.test;
+
+import edu.uci.iotproject.comparison.seqalignment.AlignmentPricer;
+import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.function.ToIntBiFunction;
+import java.util.function.ToIntFunction;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests the implementation of {@link SequenceAlignment}.
+ *
+ * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
+ * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
+ */
+public class SequenceAlignmentTest {
+
+ private char[] lowercaseVowels;
+ private char[] lowercaseConsonants;
+
+ private Character[] meanChars;
+ private Character[] nameChars;
+
+ /**
+ * Cost function for the alignment of letters in the example execution of the sequence alignment algorithm in
+ * Kleinberg's and Tardos' "Algorithm Design", where 'mean' and 'name' are aligned.
+ */
+ private ToIntBiFunction<Character, Character> kleinbergExampleAlignmentCostFunc;
+
+ /**
+ * Cost function for the alignment of letters with gaps in the example execution of the sequence alignment algorithm
+ * in Kleinberg's and Tardos' "Algorithm Design", where 'mean' and 'name' are aligned. Gap cost is set to 2,
+ * regardless of input character.
+ */
+ private ToIntFunction<Character> kleinbergExampleGapCostFunc;
+
+ /**
+ * Calculates the cost of aligning a letter with another letter or a letter with a gap according to the cost recipe
+ * used in the example in Kleinberg & Tardos.
+ */
+ private AlignmentPricer<Character> kleinbergAlignmentPricer;
+
+ /**
+ * Executes the sequence alignment algorithm using the cost function defined in the example in Kleinberg & Tardos,
+ * i.e., {@link #kleinbergAlignmentPricer}.
+ */
+ private SequenceAlignment<Character> kleinbergSequenceAligner;
+
+ @Before
+ public void initialize() {
+ // We consider 'y' a vowel for the sake of simplicity.
+ // Note: we assume an all lowercase string!
+ lowercaseVowels = new char[] { 'a', 'e', 'i', 'o', 'u', 'y' };
+ lowercaseConsonants = new char[] { 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's',
+ 't', 'v', 'w', 'x', 'z' };
+ kleinbergExampleAlignmentCostFunc = (c1, c2) -> {
+ // Unbox to primitive type for the sake of brevity in the statements to follow.
+ final char char1 = c1.charValue();
+ final char char2 = c2.charValue();
+
+ // If char1 and char2 are the same characters, the cost of aligning them is 0.
+ if (char1 == char2) return 0;
+
+ final boolean char1IsVowel = isVowel(char1);
+ final boolean char1IsConsonant = isConsonant(char1);
+ final boolean char2IsVowel = isVowel(char2);
+ final boolean char2IsConsonant = isConsonant(char2);
+
+ // Alignment cost is undefined for non alphabet characters.
+ if (!char1IsVowel && !char1IsConsonant) fail("not an alphabet letter: " + char1);
+ if (!char2IsVowel && !char2IsConsonant) fail("not an alphabet letter: " + char2);
+
+ // If char1 and char2 are both vowels or both consonants, the cost is 1.
+ if (char1IsVowel && char2IsVowel || char1IsConsonant && char2IsConsonant) return 1;
+
+ // If one of char1 and char2 is a consonant, while the other is a vowel, the cost is 3.
+ return 3;
+ };
+ // The cost of a gap is 2, regardless of what letter is aligned with the gap.
+ kleinbergExampleGapCostFunc = c -> 2;
+
+ // char[] -> Character[] conversion courtesy of https://stackoverflow.com/a/27690990/1214974
+ meanChars = "mean".chars().mapToObj(c -> (char)c).toArray(Character[]::new);
+ nameChars = "name".chars().mapToObj(c -> (char)c).toArray(Character[]::new);
+
+ kleinbergAlignmentPricer = new AlignmentPricer<>(kleinbergExampleAlignmentCostFunc,
+ kleinbergExampleGapCostFunc);
+
+ kleinbergSequenceAligner = new SequenceAlignment<>(kleinbergAlignmentPricer);
+ }
+
+ @Test
+ public void kleinbergExampleOptAlignmentCostShouldBe6() {
+ // Cost of the optimal alignment of the two words
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(meanChars, nameChars);
+ final int expectedAlignmentCost = 6;
+ String msg = String.format("Kleinberg example: computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+
+ @Test
+ public void meanAlignedWithEmptyStringShouldBe8() {
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(meanChars, new Character[0]);
+ // 'mean' aligned with the empty string equals paying four gap costs, so total cost is: 4 * 2 = 8.
+ final int expectedAlignmentCost = 8;
+ String msg = String.format("'mean' aligned with empty string: computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+ @Test
+ public void mAlignedWithNameShouldBe6() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ * n a m e
+ * _ _ m _
+ * This should have a cost of 3 * gapCost = 6
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'm' }, nameChars);
+ final int expectedAlignmentCost = 6;
+ String msg = String.format("'m' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+ @Test
+ public void meAlignedWithNameShouldBe4() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ * n a m e
+ * _ _ m e
+ * This should have a cost of 2 * gapCost = 4
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'm', 'e' }, nameChars);
+ final int expectedAlignmentCost = 4;
+ String msg = String.format("'me' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ // Check that order of arguments doesn't matter
+ final int optAlignmentCostReversed = kleinbergSequenceAligner.calculateAlignment(nameChars, new Character[] { 'm', 'e' });
+ msg = "'me' aligned with 'name': different order of arguments unexpectedly produced different result";
+ assertTrue(msg, optAlignmentCostReversed == optAlignmentCost && optAlignmentCostReversed == expectedAlignmentCost);
+ }
+
+ @Test
+ public void ameAlignedWithNameShouldBe2() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ * n a m e
+ * _ a m e
+ * This should have a cost of 1 * gapCost = 2
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'a', 'm', 'e' }, nameChars);
+ final int expectedAlignmentCost = 2;
+ String msg = String.format("'ame' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+ @Test
+ public void fameAlignedWithNameShouldBe1() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ * n a m e
+ * f a m e
+ * This should have a cost of 1 * consonantMatchedWithConsonantCost = 1
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'f', 'a', 'm', 'e' },
+ nameChars);
+ final int expectedAlignmentCost = 1;
+ String msg = String.format("'fame' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+ @Test
+ public void nameAlignedWithNameShouldBe0() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ * n a m e
+ * n a m e
+ * This should have a cost of 0.
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'n', 'a', 'm', 'e' },
+ nameChars);
+ final int expectedAlignmentCost = 0;
+ String msg = String.format("'name' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+ @Test
+ public void emanAlignedWithNameShouldBe6() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ *
+ * _ n a m e
+ * e m a n _
+ *
+ * or
+ *
+ * n a m e _
+ * _ e m a n
+ *
+ * This should have a cost of 2 * gapCost + 2 * consonantMatchedWithConsonantCost = 2 * 2 + 2 * 1 = 6.
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'e', 'm', 'a', 'n' },
+ nameChars);
+ final int expectedAlignmentCost = 6;
+ String msg = String.format("'eman' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+ @Test
+ public void naemAlignedWithNameShouldBe4() {
+ /*
+ * Note: this also uses the cost function specified in Kleinberg & Tardos.
+ * Best alignment should be:
+ *
+ * n a _ m e
+ * n a e m _
+ *
+ * or
+ *
+ * n a m e _
+ * n a _ e m
+ *
+ * This should have a cost of 2 * gapCost = 4.
+ */
+ final int optAlignmentCost = kleinbergSequenceAligner.calculateAlignment(new Character[] { 'n', 'a', 'e', 'm' },
+ nameChars);
+ final int expectedAlignmentCost = 4;
+ String msg = String.format("'naem' aligned with 'name': computed opt != expected opt (computed=%d expected=%d)",
+ optAlignmentCost, expectedAlignmentCost);
+ assertTrue(msg, optAlignmentCost == expectedAlignmentCost);
+ }
+
+
+ /**
+ * Checks if {@code letter} is a lowercase vowel. Note: for simplicity, 'y' is considered a <em>vowel</em>.
+ * @param letter A {@code char} expected to be a vowel.
+ * @return {@code true} if {@code letter} is a vowel, {@code false} otherwise.
+ */
+ private boolean isVowel(char letter) {
+ for (char vowel : lowercaseVowels) {
+ if (letter == vowel) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks if {@code letter} is a lowercase consonant. Note: for simplicity, 'y' is considered a <em>vowel</em>.
+ * @param letter A {@code char} expected to be a consonant.
+ * @return {@code true} if {@code letter} is a consonant, {@code false} otherwise.
+ */
+ private boolean isConsonant(char letter) {
+ for (char consonant : lowercaseConsonants) {
+ if (letter == consonant) {
+ return true;
+ }
+ }
+ return false;
+ }
+}