SequenceExtration.java: finish up naive implementation of extract(...) and move to...
authorJanus Varmarken <varmarken@gmail.com>
Tue, 21 Aug 2018 22:11:08 +0000 (15:11 -0700)
committerJanus Varmarken <varmarken@gmail.com>
Tue, 21 Aug 2018 22:11:08 +0000 (15:11 -0700)
Main.java: add code for exploring the number of false positives for sequence alignment.
TcpConversationUtils.java: move implementation of getPacketLengthSequence(Conversation) to this class from SequenceExtraction as it is a generally useful util method.

Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java [deleted file]
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java [new file with mode: 0644]
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java [new file with mode: 0644]

index 2f5a74ed12c7b0de9311c78f27dca4000e476a93..f19b7b2125ebbac1ec1275677f2caf3326b11d7f 100644 (file)
@@ -6,6 +6,9 @@ import edu.uci.iotproject.analysis.TcpConversationUtils;
 import edu.uci.iotproject.analysis.TrafficLabeler;
 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
 import edu.uci.iotproject.analysis.UserAction;
+import edu.uci.iotproject.comparison.seqalignment.ExtractedSequence;
+import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment;
+import edu.uci.iotproject.comparison.seqalignment.SequenceExtraction;
 import edu.uci.iotproject.io.TriggerTimesFileReader;
 import org.pcap4j.core.*;
 import org.pcap4j.packet.namednumber.DataLinkType;
@@ -34,8 +37,8 @@ public class Main {
         // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------
         // Paths to input and output files (consider supplying these as arguments instead) and IP of the device for
         // which traffic is to be extracted:
-        String path = "/scratch/July-2018"; // Rahmadi
-        //String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
+//        String path = "/scratch/July-2018"; // Rahmadi
+        String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
 
         // 1) D-Link July 26 experiment
 //        final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
@@ -52,10 +55,10 @@ public class Main {
         // 2b) TP-Link July 25 experiment TRUNCATED:
         // Only contains "true local" events, i.e., before the behavior changes to remote-like behavior.
         // Last included event is at July 25 10:38:11; file filtered to only include packets with arrival time <= 10:38:27.
-//        final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap";
-//        final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap";
-//        final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps";
-//        final String deviceIp = "192.168.1.159";
+        final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap";
+        final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap";
+        final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps";
+        final String deviceIp = "192.168.1.159";
 
         // 3) SmartThings Plug July 25 experiment
 //        final String inputPcapFile = path + "/2018-07/stplug/stplug.wlan1.local.pcap";
@@ -135,10 +138,10 @@ public class Main {
 //        final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps";
 //        final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
         // August 17
-        final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap";
-        final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap";
-        final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps";
-        final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
+//        final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap";
+//        final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap";
+//        final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps";
+//        final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
 
         TriggerTimesFileReader ttfr = new TriggerTimesFileReader();
         List<Instant> triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false);
@@ -212,7 +215,45 @@ public class Main {
             });
         });
 
+        // ================================================================================================
+        // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
+        //
+        // Currently need to know relevant hostname in advance :(
+        String hostname = "events.tplinkra.com";
+        // Conversations with 'hostname' for ON events.
+        List<Conversation> onsForHostname = new ArrayList<>();
+        // Conversations with 'hostname' for OFF events.
+        List<Conversation> offsForHostname = new ArrayList<>();
+        // "Unwrap" sequence groupings in ons/offs maps.
+        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
+        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
+        // Extract representative sequence for ON and OFF by providing the list of conversations with
+        // 'hostname' observed for each event type (the training data).
+        SequenceExtraction seqExtraction = new SequenceExtraction();
+        ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
+        ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
+        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
+        // labeled).
+        int onsLabeledAsOff = 0;
+        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
+        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
+        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
+        for (Conversation c : onsForHostname) {
+            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
+                onsLabeledAsOff++;
+            }
+        }
+        int offsLabeledAsOn = 0;
+        for (Conversation c : offsForHostname) {
+            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
+                offsLabeledAsOn++;
+            }
+        }
         System.out.println("");
+        // ================================================================================================
+
 
         // -------------------------------------------------------------------------------------------------------------
         // -------------------------------------------------------------------------------------------------------------
diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java
deleted file mode 100644 (file)
index bb1e210..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-package edu.uci.iotproject;
-
-import edu.uci.iotproject.comparison.seqalignment.AlignmentPricer;
-import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment;
-import org.pcap4j.core.PcapPacket;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * TODO add class documentation.
- *
- * @author Janus Varmarken
- */
-public class SequenceExtraction {
-
-
-    private final SequenceAlignment<Integer> mAlignmentAlg;
-
-
-    public SequenceExtraction() {
-        mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10));
-    }
-
-
-    public SequenceExtraction(SequenceAlignment<Integer> alignmentAlgorithm) {
-        mAlignmentAlg = alignmentAlgorithm;
-    }
-
-    // Initial
-//    /**
-//     *
-//     * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action.
-//     */
-//    public void extract(List<Conversation> convsForAction) {
-//        int maxDifference = 0;
-//
-//        for (int i = 0; i < convsForAction.size(); i++) {
-//            for (int j = i+1; j < convsForAction.size(); i++) {
-//                Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
-//                Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
-//                int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
-//                if (alignmentCost > maxDifference) {
-//                    maxDifference = alignmentCost;
-//                }
-//            }
-//        }
-//
-//    }
-
-
-//    public void extract(Map<String, List<Conversation>> hostnameToConvs) {
-//        int maxDifference = 0;
-//
-//        for (int i = 0; i < convsForAction.size(); i++) {
-//            for (int j = i+1; j < convsForAction.size(); i++) {
-//                Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
-//                Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
-//                int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
-//                if (alignmentCost > maxDifference) {
-//                    maxDifference = alignmentCost;
-//                }
-//            }
-//        }
-//
-//    }
-
-    private Integer[] getPacketLengthSequence(Conversation c) {
-        List<PcapPacket> packets = c.getPackets();
-        Integer[] packetLengthSequence = new Integer[packets.size()];
-        for (int i = 0; i < packetLengthSequence.length; i++) {
-            packetLengthSequence[i] = packets.get(i).length();
-        }
-        return packetLengthSequence;
-    }
-}
index a27e5fcbaa12eb2608c712110ac09943d10f7094..2b172bcbb9c29fa95f37a681b2959f6b318deafc 100644 (file)
@@ -255,6 +255,21 @@ public class TcpConversationUtils {
         return result;
     }
 
+    /**
+     * Given a {@link Conversation}, extract its packet length sequence.
+     * @param c The {@link Conversation} from which a packet length sequence is to be extracted.
+     * @return An {@code Integer[]} that holds the packet lengths of all payload-carrying packets in {@code c}. The
+     *         packet lengths in the returned array are ordered by packet timestamp.
+     */
+    public static Integer[] getPacketLengthSequence(Conversation c) {
+        List<PcapPacket> packets = c.getPackets();
+        Integer[] packetLengthSequence = new Integer[packets.size()];
+        for (int i = 0; i < packetLengthSequence.length; i++) {
+            packetLengthSequence[i] = packets.get(i).getOriginalLength();
+        }
+        return packetLengthSequence;
+    }
+
     /**
      * Appends a space to {@code sb} <em>iff</em> {@code sb} already contains some content.
      * @param sb A {@link StringBuilder} that should have a space appended <em>iff</em> it is not empty.
diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java
new file mode 100644 (file)
index 0000000..423e3c8
--- /dev/null
@@ -0,0 +1,37 @@
+package edu.uci.iotproject.comparison.seqalignment;
+
+import edu.uci.iotproject.Conversation;
+
+/**
+ * TODO add class documentation.
+ *
+ * @author Janus Varmarken
+ */
+public class ExtractedSequence {
+
+    private final Conversation mRepresentativeSequence;
+
+    private final int mMaxAlignmentCost;
+
+    private final String mSequenceString;
+
+    public ExtractedSequence(Conversation sequence, int maxAlignmentCost) {
+        mRepresentativeSequence = sequence;
+        mMaxAlignmentCost = maxAlignmentCost;
+        StringBuilder sb = new StringBuilder();
+        sequence.getPackets().forEach(p -> {
+            if (sb.length() != 0) sb.append(" ");
+            sb.append(p.getOriginalLength());
+        });
+        mSequenceString = sb.toString();
+    }
+
+    public Conversation getRepresentativeSequence() {
+        return mRepresentativeSequence;
+    }
+
+    public int getMaxAlignmentCost() {
+        return mMaxAlignmentCost;
+    }
+
+}
diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java
new file mode 100644 (file)
index 0000000..8003670
--- /dev/null
@@ -0,0 +1,120 @@
+package edu.uci.iotproject.comparison.seqalignment;
+
+import edu.uci.iotproject.Conversation;
+import edu.uci.iotproject.analysis.TcpConversationUtils;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * TODO add class documentation.
+ *
+ * @author Janus Varmarken
+ */
+public class SequenceExtraction {
+
+
+    private final SequenceAlignment<Integer> mAlignmentAlg;
+
+
+    public SequenceExtraction() {
+        mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10));
+    }
+
+
+    public SequenceExtraction(SequenceAlignment<Integer> alignmentAlgorithm) {
+        mAlignmentAlg = alignmentAlgorithm;
+    }
+
+    /**
+     * Gets the {@link SequenceAlignment} used to perform the sequence extraction.
+     * @return the {@link SequenceAlignment} used to perform the sequence extraction.
+     */
+    public SequenceAlignment<Integer> getAlignmentAlgorithm() {
+        return mAlignmentAlg;
+    }
+
+    // Initial
+//    /**
+//     *
+//     * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action.
+//     */
+//    public void extract(List<Conversation> convsForAction) {
+//        int maxDifference = 0;
+//
+//        for (int i = 0; i < convsForAction.size(); i++) {
+//            for (int j = i+1; j < convsForAction.size(); i++) {
+//                Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
+//                Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
+//                int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
+//                if (alignmentCost > maxDifference) {
+//                    maxDifference = alignmentCost;
+//                }
+//            }
+//        }
+//
+//    }
+
+
+//    public void extract(Map<String, List<Conversation>> hostnameToConvs) {
+//        int maxDifference = 0;
+//
+//        for (int i = 0; i < convsForAction.size(); i++) {
+//            for (int j = i+1; j < convsForAction.size(); i++) {
+//                Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
+//                Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
+//                int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
+//                if (alignmentCost > maxDifference) {
+//                    maxDifference = alignmentCost;
+//                }
+//            }
+//        }
+//
+//    }
+
+
+    public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
+        // First group conversations by packet sequences.
+        // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
+        // as it can potentially result in sequences that are equal in terms of payload packets to be considered
+        // different due to differences in how they are terminated.
+        Map<String, List<Conversation>> groupedBySequence =
+                TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
+        // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
+        Conversation mostFrequentConv = null;
+        int maxFrequency = 0;
+        for (Map.Entry<String, List<Conversation>> seqMapEntry : groupedBySequence.entrySet()) {
+            if (seqMapEntry.getValue().size() > maxFrequency) {
+                // Found a more frequent sequence
+                maxFrequency = seqMapEntry.getValue().size();
+                // We just pick the first conversation as the representative conversation for this sequence type.
+                mostFrequentConv = seqMapEntry.getValue().get(0);
+            } else if (seqMapEntry.getValue().size() == maxFrequency) {
+                // This sequence has the same frequency as the max frequency seen so far.
+                // Break ties by choosing the longest sequence.
+                // First get an arbitrary representative of currently examined sequence; we just pick the first.
+                Conversation c = seqMapEntry.getValue().get(0);
+                mostFrequentConv = c.getPackets().size() > mostFrequentConv.getPackets().size() ? c : mostFrequentConv;
+            }
+        }
+        // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+        // each of the rest of the conversations also associated with this action and hostname.
+        int maxCost = 0;
+        final Integer[] mostFrequentConvSeq = TcpConversationUtils.getPacketLengthSequence(mostFrequentConv);
+        for (Conversation c : convsForActionForHostname) {
+            if (c == mostFrequentConv) {
+                // Don't compute distance to self.
+                continue;
+            }
+            Integer[] cSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            int alignmentCost = mAlignmentAlg.calculateAlignment(mostFrequentConvSeq, cSeq);
+            if (alignmentCost > maxCost) {
+                maxCost = alignmentCost;
+            }
+        }
+        return new ExtractedSequence(mostFrequentConv, maxCost);
+    }
+
+
+
+}