From 9dbae3199586a0da2205ecd943b21bf0bef8b3e0 Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Tue, 21 Aug 2018 15:11:08 -0700 Subject: [PATCH] SequenceExtration.java: finish up naive implementation of extract(...) and move to seqalignment subpackage. Main.java: add code for exploring the number of false positives for sequence alignment. TcpConversationUtils.java: move implementation of getPacketLengthSequence(Conversation) to this class from SequenceExtraction as it is a generally useful util method. --- .../main/java/edu/uci/iotproject/Main.java | 61 +++++++-- .../uci/iotproject/SequenceExtraction.java | 76 ----------- .../analysis/TcpConversationUtils.java | 15 +++ .../seqalignment/ExtractedSequence.java | 37 ++++++ .../seqalignment/SequenceExtraction.java | 120 ++++++++++++++++++ 5 files changed, 223 insertions(+), 86 deletions(-) delete mode 100644 Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java create mode 100644 Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java create mode 100644 Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java index 2f5a74e..f19b7b2 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java @@ -6,6 +6,9 @@ import edu.uci.iotproject.analysis.TcpConversationUtils; import edu.uci.iotproject.analysis.TrafficLabeler; import edu.uci.iotproject.analysis.TriggerTrafficExtractor; import edu.uci.iotproject.analysis.UserAction; +import edu.uci.iotproject.comparison.seqalignment.ExtractedSequence; +import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment; +import edu.uci.iotproject.comparison.seqalignment.SequenceExtraction; import edu.uci.iotproject.io.TriggerTimesFileReader; import org.pcap4j.core.*; import org.pcap4j.packet.namednumber.DataLinkType; @@ -34,8 +37,8 @@ public class Main { // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------ // Paths to input and output files (consider supplying these as arguments instead) and IP of the device for // which traffic is to be extracted: - String path = "/scratch/July-2018"; // Rahmadi - //String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus +// String path = "/scratch/July-2018"; // Rahmadi + String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus // 1) D-Link July 26 experiment // final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap"; @@ -52,10 +55,10 @@ public class Main { // 2b) TP-Link July 25 experiment TRUNCATED: // Only contains "true local" events, i.e., before the behavior changes to remote-like behavior. // Last included event is at July 25 10:38:11; file filtered to only include packets with arrival time <= 10:38:27. -// final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap"; -// final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap"; -// final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps"; -// final String deviceIp = "192.168.1.159"; + final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap"; + final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap"; + final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps"; + final String deviceIp = "192.168.1.159"; // 3) SmartThings Plug July 25 experiment // final String inputPcapFile = path + "/2018-07/stplug/stplug.wlan1.local.pcap"; @@ -135,10 +138,10 @@ public class Main { // final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps"; // final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa // August 17 - final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap"; - final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap"; - final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps"; - final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa +// final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap"; +// final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap"; +// final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps"; +// final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa TriggerTimesFileReader ttfr = new TriggerTimesFileReader(); List triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false); @@ -212,7 +215,45 @@ public class Main { }); }); + // ================================================================================================ + // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>> + // + // Currently need to know relevant hostname in advance :( + String hostname = "events.tplinkra.com"; + // Conversations with 'hostname' for ON events. + List onsForHostname = new ArrayList<>(); + // Conversations with 'hostname' for OFF events. + List offsForHostname = new ArrayList<>(); + // "Unwrap" sequence groupings in ons/offs maps. + ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v)); + offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v)); + // Extract representative sequence for ON and OFF by providing the list of conversations with + // 'hostname' observed for each event type (the training data). + SequenceExtraction seqExtraction = new SequenceExtraction(); + ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname); + ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname); + // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly + // labeled). + int onsLabeledAsOff = 0; + Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence()); + Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence()); + SequenceAlignment seqAlg = seqExtraction.getAlignmentAlgorithm(); + for (Conversation c : onsForHostname) { + Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c); + if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) { + onsLabeledAsOff++; + } + } + int offsLabeledAsOn = 0; + for (Conversation c : offsForHostname) { + Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c); + if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) { + offsLabeledAsOn++; + } + } System.out.println(""); + // ================================================================================================ + // ------------------------------------------------------------------------------------------------------------- // ------------------------------------------------------------------------------------------------------------- diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java deleted file mode 100644 index bb1e210..0000000 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/SequenceExtraction.java +++ /dev/null @@ -1,76 +0,0 @@ -package edu.uci.iotproject; - -import edu.uci.iotproject.comparison.seqalignment.AlignmentPricer; -import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment; -import org.pcap4j.core.PcapPacket; - -import java.util.List; -import java.util.Map; - -/** - * TODO add class documentation. - * - * @author Janus Varmarken - */ -public class SequenceExtraction { - - - private final SequenceAlignment mAlignmentAlg; - - - public SequenceExtraction() { - mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10)); - } - - - public SequenceExtraction(SequenceAlignment alignmentAlgorithm) { - mAlignmentAlg = alignmentAlgorithm; - } - - // Initial -// /** -// * -// * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action. -// */ -// public void extract(List convsForAction) { -// int maxDifference = 0; -// -// for (int i = 0; i < convsForAction.size(); i++) { -// for (int j = i+1; j < convsForAction.size(); i++) { -// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i)); -// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j)); -// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2); -// if (alignmentCost > maxDifference) { -// maxDifference = alignmentCost; -// } -// } -// } -// -// } - - -// public void extract(Map> hostnameToConvs) { -// int maxDifference = 0; -// -// for (int i = 0; i < convsForAction.size(); i++) { -// for (int j = i+1; j < convsForAction.size(); i++) { -// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i)); -// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j)); -// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2); -// if (alignmentCost > maxDifference) { -// maxDifference = alignmentCost; -// } -// } -// } -// -// } - - private Integer[] getPacketLengthSequence(Conversation c) { - List packets = c.getPackets(); - Integer[] packetLengthSequence = new Integer[packets.size()]; - for (int i = 0; i < packetLengthSequence.length; i++) { - packetLengthSequence[i] = packets.get(i).length(); - } - return packetLengthSequence; - } -} diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java index a27e5fc..2b172bc 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java @@ -255,6 +255,21 @@ public class TcpConversationUtils { return result; } + /** + * Given a {@link Conversation}, extract its packet length sequence. + * @param c The {@link Conversation} from which a packet length sequence is to be extracted. + * @return An {@code Integer[]} that holds the packet lengths of all payload-carrying packets in {@code c}. The + * packet lengths in the returned array are ordered by packet timestamp. + */ + public static Integer[] getPacketLengthSequence(Conversation c) { + List packets = c.getPackets(); + Integer[] packetLengthSequence = new Integer[packets.size()]; + for (int i = 0; i < packetLengthSequence.length; i++) { + packetLengthSequence[i] = packets.get(i).getOriginalLength(); + } + return packetLengthSequence; + } + /** * Appends a space to {@code sb} iff {@code sb} already contains some content. * @param sb A {@link StringBuilder} that should have a space appended iff it is not empty. diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java new file mode 100644 index 0000000..423e3c8 --- /dev/null +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java @@ -0,0 +1,37 @@ +package edu.uci.iotproject.comparison.seqalignment; + +import edu.uci.iotproject.Conversation; + +/** + * TODO add class documentation. + * + * @author Janus Varmarken + */ +public class ExtractedSequence { + + private final Conversation mRepresentativeSequence; + + private final int mMaxAlignmentCost; + + private final String mSequenceString; + + public ExtractedSequence(Conversation sequence, int maxAlignmentCost) { + mRepresentativeSequence = sequence; + mMaxAlignmentCost = maxAlignmentCost; + StringBuilder sb = new StringBuilder(); + sequence.getPackets().forEach(p -> { + if (sb.length() != 0) sb.append(" "); + sb.append(p.getOriginalLength()); + }); + mSequenceString = sb.toString(); + } + + public Conversation getRepresentativeSequence() { + return mRepresentativeSequence; + } + + public int getMaxAlignmentCost() { + return mMaxAlignmentCost; + } + +} diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java new file mode 100644 index 0000000..8003670 --- /dev/null +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java @@ -0,0 +1,120 @@ +package edu.uci.iotproject.comparison.seqalignment; + +import edu.uci.iotproject.Conversation; +import edu.uci.iotproject.analysis.TcpConversationUtils; + +import java.util.List; +import java.util.Map; + +/** + * TODO add class documentation. + * + * @author Janus Varmarken + */ +public class SequenceExtraction { + + + private final SequenceAlignment mAlignmentAlg; + + + public SequenceExtraction() { + mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10)); + } + + + public SequenceExtraction(SequenceAlignment alignmentAlgorithm) { + mAlignmentAlg = alignmentAlgorithm; + } + + /** + * Gets the {@link SequenceAlignment} used to perform the sequence extraction. + * @return the {@link SequenceAlignment} used to perform the sequence extraction. + */ + public SequenceAlignment getAlignmentAlgorithm() { + return mAlignmentAlg; + } + + // Initial +// /** +// * +// * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action. +// */ +// public void extract(List convsForAction) { +// int maxDifference = 0; +// +// for (int i = 0; i < convsForAction.size(); i++) { +// for (int j = i+1; j < convsForAction.size(); i++) { +// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i)); +// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j)); +// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2); +// if (alignmentCost > maxDifference) { +// maxDifference = alignmentCost; +// } +// } +// } +// +// } + + +// public void extract(Map> hostnameToConvs) { +// int maxDifference = 0; +// +// for (int i = 0; i < convsForAction.size(); i++) { +// for (int j = i+1; j < convsForAction.size(); i++) { +// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i)); +// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j)); +// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2); +// if (alignmentCost > maxDifference) { +// maxDifference = alignmentCost; +// } +// } +// } +// +// } + + + public ExtractedSequence extract(List convsForActionForHostname) { + // First group conversations by packet sequences. + // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here + // as it can potentially result in sequences that are equal in terms of payload packets to be considered + // different due to differences in how they are terminated. + Map> groupedBySequence = + TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname); + // Then get a hold of one of the conversations that gave rise to the most frequent sequence. + Conversation mostFrequentConv = null; + int maxFrequency = 0; + for (Map.Entry> seqMapEntry : groupedBySequence.entrySet()) { + if (seqMapEntry.getValue().size() > maxFrequency) { + // Found a more frequent sequence + maxFrequency = seqMapEntry.getValue().size(); + // We just pick the first conversation as the representative conversation for this sequence type. + mostFrequentConv = seqMapEntry.getValue().get(0); + } else if (seqMapEntry.getValue().size() == maxFrequency) { + // This sequence has the same frequency as the max frequency seen so far. + // Break ties by choosing the longest sequence. + // First get an arbitrary representative of currently examined sequence; we just pick the first. + Conversation c = seqMapEntry.getValue().get(0); + mostFrequentConv = c.getPackets().size() > mostFrequentConv.getPackets().size() ? c : mostFrequentConv; + } + } + // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the + // each of the rest of the conversations also associated with this action and hostname. + int maxCost = 0; + final Integer[] mostFrequentConvSeq = TcpConversationUtils.getPacketLengthSequence(mostFrequentConv); + for (Conversation c : convsForActionForHostname) { + if (c == mostFrequentConv) { + // Don't compute distance to self. + continue; + } + Integer[] cSeq = TcpConversationUtils.getPacketLengthSequence(c); + int alignmentCost = mAlignmentAlg.calculateAlignment(mostFrequentConvSeq, cSeq); + if (alignmentCost > maxCost) { + maxCost = alignmentCost; + } + } + return new ExtractedSequence(mostFrequentConv, maxCost); + } + + + +} -- 2.34.1