From d6b17b053971ea9cd2a8c6987d03d6ca0dad91b3 Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Tue, 4 Sep 2018 12:08:21 -0700 Subject: [PATCH] Do some refactoring to prevent code duplication; change string sequence generation/grouping to utilize Stream API. --- .../java/edu/uci/iotproject/Conversation.java | 5 +- .../main/java/edu/uci/iotproject/Main.java | 72 ++++------ .../analysis/TcpConversationUtils.java | 129 +++++------------- .../seqalignment/SequenceExtraction.java | 2 +- .../uci/iotproject/util/PcapPacketUtils.java | 24 +++- 5 files changed, 89 insertions(+), 143 deletions(-) diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Conversation.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Conversation.java index 28515e3..73d165d 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Conversation.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Conversation.java @@ -562,17 +562,16 @@ public class Conversation { CLIENT_TO_SERVER { @Override public String toCompactString() { - return "C->S"; + return "*"; } }, SERVER_TO_CLIENT { @Override public String toCompactString() { - return "S->C"; + return ""; } }; - /** * Get a compact string representation of this {@code Direction}. * @return a compact string representation of this {@code Direction}. diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java index 73eac01..23901a3 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java @@ -39,9 +39,9 @@ public class Main { // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------ // Paths to input and output files (consider supplying these as arguments instead) and IP of the device for // which traffic is to be extracted: - String path = "/scratch/July-2018"; // Rahmadi -// String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus - boolean verbose = false; +// String path = "/scratch/July-2018"; // Rahmadi + String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus + boolean verbose = true; final String onPairsPath = "/scratch/July-2018/on.txt"; final String offPairsPath = "/scratch/July-2018/off.txt"; @@ -60,10 +60,10 @@ public class Main { // 2b) TP-Link July 25 experiment TRUNCATED: // Only contains "true local" events, i.e., before the behavior changes to remote-like behavior. // Last included event is at July 25 10:38:11; file filtered to only include packets with arrival time <= 10:38:27. -// final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap"; -// final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap"; -// final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps"; -// final String deviceIp = "192.168.1.159"; + final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap"; + final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap"; + final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps"; + final String deviceIp = "192.168.1.159"; // 3) SmartThings Plug July 25 experiment // final String inputPcapFile = path + "/2018-07/stplug/stplug.wlan1.local.pcap"; @@ -138,10 +138,10 @@ public class Main { // final String deviceIp = "192.168.1.246"; // .246 == phone; .127 == Nest thermostat // 15) Alexa August 16 experiment - final String inputPcapFile = path + "/2018-08/alexa/alexa.wlan1.local.pcap"; - final String outputPcapFile = path + "/2018-08/alexa/alexa-processed.pcap"; - final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps"; - final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa +// final String inputPcapFile = path + "/2018-08/alexa/alexa.wlan1.local.pcap"; +// final String outputPcapFile = path + "/2018-08/alexa/alexa-processed.pcap"; +// final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps"; +// final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa // August 17 // final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap"; // final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap"; @@ -205,40 +205,22 @@ public class Main { Map>> ons = new HashMap<>(); // Contains all OFF events: hostname -> sequence identifier -> list of conversations with that sequence Map>> offs = new HashMap<>(); - - if (verbose) { - userActionsToConvsByHostname.forEach((ua, hostnameToConvs) -> { - Map>> outer = ua.getType() == Type.TOGGLE_ON ? ons : offs; - hostnameToConvs.forEach((host, convs) -> { - Map> seqsToConvs = TcpConversationUtils. - groupConversationsByPacketSequenceVerbose(convs); - outer.merge(host, seqsToConvs, (oldMap, newMap) -> { - newMap.forEach((sequence, cs) -> oldMap.merge(sequence, cs, (list1, list2) -> { - list1.addAll(list2); - return list1; - })); - return oldMap; - }); - }); - }); - } else { - userActionsToConvsByHostname.forEach((ua, hostnameToConvs) -> { - Map>> outer = ua.getType() == Type.TOGGLE_ON ? ons : offs; - hostnameToConvs.forEach((host, convs) -> { - Map> seqsToConvs = TcpConversationUtils. - groupConversationsByPacketSequence(convs); - outer.merge(host, seqsToConvs, (oldMap, newMap) -> { - newMap.forEach((sequence, cs) -> oldMap.merge(sequence, cs, (list1, list2) -> { - list1.addAll(list2); - return list1; - })); - return oldMap; - }); + userActionsToConvsByHostname.forEach((ua, hostnameToConvs) -> { + Map>> outer = ua.getType() == Type.TOGGLE_ON ? ons : offs; + hostnameToConvs.forEach((host, convs) -> { + Map> seqsToConvs = TcpConversationUtils. + groupConversationsByPacketSequence(convs, verbose); + outer.merge(host, seqsToConvs, (oldMap, newMap) -> { + newMap.forEach((sequence, cs) -> oldMap.merge(sequence, cs, (list1, list2) -> { + list1.addAll(list2); + return list1; + })); + return oldMap; }); }); - } - + }); + /* // Print out all the pairs into a file for ON events File fileOnEvents = new File(onPairsPath); PrintWriter pwOn = null; @@ -324,14 +306,14 @@ public class Main { } } pwOff.close(); - + */ // ================================================================================================ // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>> // // Currently need to know relevant hostname in advance :( -// String hostname = "events.tplinkra.com"; - String hostname = "rfe-us-west-1.dch.dlink.com"; + String hostname = "events.tplinkra.com"; +// String hostname = "rfe-us-west-1.dch.dlink.com"; // Conversations with 'hostname' for ON events. List onsForHostname = new ArrayList<>(); // Conversations with 'hostname' for OFF events. diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java index dc38358..470b15f 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java @@ -2,7 +2,6 @@ package edu.uci.iotproject.analysis; import edu.uci.iotproject.Conversation; import edu.uci.iotproject.DnsMap; -import edu.uci.iotproject.FinAckPair; import edu.uci.iotproject.util.PcapPacketUtils; import org.pcap4j.core.PcapPacket; import org.pcap4j.packet.IpV4Packet; @@ -10,6 +9,9 @@ import org.pcap4j.packet.TcpPacket; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static edu.uci.iotproject.util.PcapPacketUtils.*; /** * Utility functions for analyzing and structuring (sets of) {@link Conversation}s. @@ -139,108 +141,22 @@ public class TcpConversationUtils { * (i.e., the set of packets returned by {@link Conversation#getPackets()}) separated by a delimiter of any * {@link Conversation} pointed to by that key. In other words, what the {@link Conversation}s {@code cs} pointed to * by the key {@code s} have in common is that they all contain exactly the same number of payload packets and - * these payload packets are identical across all {@code Conversation}s in {@code convs} in terms of packet + * these payload packets are identical across all {@code Conversation}s in {@code cs} in terms of packet * length and packet order. For example, if the key is "152 440 550", this means that every individual * {@code Conversation} in the list of {@code Conversation}s pointed to by that key contain exactly three payload * packet of lengths 152, 440, and 550, and these three packets are ordered in the order prescribed by the key. - * This verbose version prints out the SYNACK, SYN, FINACK, FIN, RST, etc. packets. * * @param conversations The collection of {@code Conversation}s to group by packet sequence. + * @param verbose If set to {@code true}, the grouping (and therefore the key) will also include SYN/SYNACK, + * FIN/FINACK, RST packets, and each payload-carrying packet will have an indication of the direction + * of the packet prepended. * @return a {@link Map} from {@link String} to {@link List} of {@link Conversation}s such that each key is the * concatenation of the packet lengths of all payload packets (i.e., the set of packets returned by * {@link Conversation#getPackets()}) separated by a delimiter of any {@link Conversation} pointed to * by that key. */ - public static Map> groupConversationsByPacketSequenceVerbose(Collection conversations) { - Map> result = new HashMap<>(); - for (Conversation conv : conversations) { - if (conv.getPackets().size() == 0) { - // Skip conversations with no payload packets. - continue; - } - StringBuilder sb = new StringBuilder(); - // Add SYN and SYNACK at front of sequence to indicate if we saw the handshake or if recording started in - // the middle of the conversation. - for (PcapPacket syn : conv.getSynPackets()) { - TcpPacket.TcpHeader tcpHeader = syn.get(TcpPacket.class).getHeader(); - if (tcpHeader.getSyn() && tcpHeader.getAck()) { - // Only append a space if there's preceding content. - appendSpaceIfNotEmpty(sb); - sb.append("SYNACK"); - } else if (tcpHeader.getSyn()) { - if (sb.length() != 0) { - // If present in the trace, the client's SYN should be at the front of the list, so it should be - // appended as the first item. - throw new AssertionError("StringBuilder had content when appending SYN"); - } - sb.append("SYN"); - } - } - // Then append the length of all application data packets. - for (PcapPacket pp : conv.getPackets()) { - // Only append a space if there's preceding content. - appendSpaceIfNotEmpty(sb); - sb.append("(" + conv.getDirection(pp).toCompactString() + "_" + pp.length() + ")"); - } - // Then append the logged FINs to indicate if conversation was terminated gracefully. - for (FinAckPair fap : conv.getFinAckPairs()) { - appendSpaceIfNotEmpty(sb); - sb.append(fap.isAcknowledged() ? "FINACK" : "FIN"); - } - // Then append the logged RSTs to indicate if conversation was terminated abruptly. - for (PcapPacket pp : conv.getRstPackets()) { - appendSpaceIfNotEmpty(sb); - sb.append("RST"); - } - List oneItemList = new ArrayList<>(); - oneItemList.add(conv); - result.merge(sb.toString(), oneItemList, (oldList, newList) -> { - oldList.addAll(newList); - return oldList; - }); - } - return result; - } - - /** - * Given a {@link Collection} of {@link Conversation}s, builds a {@link Map} from {@link String} to {@link List} - * of {@link Conversation}s such that each key is the concatenation of the packet lengths of all payload packets - * (i.e., the set of packets returned by {@link Conversation#getPackets()}) separated by a delimiter of any - * {@link Conversation} pointed to by that key. In other words, what the {@link Conversation}s {@code cs} pointed to - * by the key {@code s} have in common is that they all contain exactly the same number of payload packets and - * these payload packets are identical across all {@code Conversation}s in {@code convs} in terms of packet - * length and packet order. For example, if the key is "152 440 550", this means that every individual - * {@code Conversation} in the list of {@code Conversation}s pointed to by that key contain exactly three payload - * packet of lengths 152, 440, and 550, and these three packets are ordered in the order prescribed by the key. - * - * @param conversations The collection of {@code Conversation}s to group by packet sequence. - * @return a {@link Map} from {@link String} to {@link List} of {@link Conversation}s such that each key is the - * concatenation of the packet lengths of all payload packets (i.e., the set of packets returned by - * {@link Conversation#getPackets()}) separated by a delimiter of any {@link Conversation} pointed to - * by that key. - */ - public static Map> groupConversationsByPacketSequence(Collection conversations) { - Map> result = new HashMap<>(); - for (Conversation conv : conversations) { - if (conv.getPackets().size() == 0) { - // Skip conversations with no payload packets. - continue; - } - StringBuilder sb = new StringBuilder(); - // Then append the length of all application data packets. - for (PcapPacket pp : conv.getPackets()) { - // Only append a space if there's preceding content. - appendSpaceIfNotEmpty(sb); - sb.append(pp.length()); - } - List oneItemList = new ArrayList<>(); - oneItemList.add(conv); - result.merge(sb.toString(), oneItemList, (oldList, newList) -> { - oldList.addAll(newList); - return oldList; - }); - } - return result; + public static Map> groupConversationsByPacketSequence(Collection conversations, boolean verbose) { + return conversations.stream().collect(Collectors.groupingBy(c -> toSequenceString(c, verbose))); } public static Map> groupConversationsByTlsApplicationDataPacketSequence(Collection conversations) { @@ -340,6 +256,33 @@ public class TcpConversationUtils { return packets.stream().map(pkt -> pkt.getOriginalLength()).toArray(Integer[]::new); } + /** + * Builds a string representation of the sequence of packets exchanged as part of {@code c}. + * @param c The {@link Conversation} for which a string representation of the packet sequence is to be constructed. + * @param verbose {@code true} if set to true, the returned sequence string will also include SYN/SYNACK, + * FIN/FINACK, RST packets, as well as an indication of the direction of payload-carrying packets. + * @return a string representation of the sequence of packets exchanged as part of {@code c}. + */ + private static String toSequenceString(Conversation c, boolean verbose) { + // Payload-parrying packets are always included, but only prepend direction if verbose output is chosen. + Stream s = c.getPackets().stream().map(p -> verbose ? c.getDirection(p).toCompactString() + p.getOriginalLength() : Integer.toString(p.getOriginalLength())); + if (verbose) { + // In the verbose case, we also print SYN, FIN and RST packets. + // Convert the SYN packets to a string representation and prepend them in front of the payload packets. + s = Stream.concat(c.getSynPackets().stream().map(p -> isSyn(p) && isAck(p) ? "SYNACK" : "SYN"), s); + // Convert the FIN packets to a string representation and append them after the payload packets. + s = Stream.concat(s, c.getFinAckPairs().stream().map(f -> f.isAcknowledged() ? "FINACK" : "FIN")); + // Convert the RST packets to a string representation and append at the end. + s = Stream.concat(s, c.getRstPackets().stream().map(r -> "RST")); + } + /* + * Note: the collector internally uses a StringBuilder, which is more efficient than simply doing string + * concatenation as in the following example: + * s.reduce("", (s1, s2) -> s1.length() == 0 ? s2 : s1 + " " + s2); + * (above code is O(N^2) where N is the number of characters) + */ + return s.collect(Collectors.joining(" ")); + } /** * Appends a space to {@code sb} iff {@code sb} already contains some content. diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java index e208501..c611968 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java @@ -81,7 +81,7 @@ public class SequenceExtraction { // as it can potentially result in sequences that are equal in terms of payload packets to be considered // different due to differences in how they are terminated. Map> groupedBySequence = - TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname); + TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname, false); // Then get a hold of one of the conversations that gave rise to the most frequent sequence. Conversation mostFrequentConv = null; diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PcapPacketUtils.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PcapPacketUtils.java index 4e57841..311fc66 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PcapPacketUtils.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PcapPacketUtils.java @@ -7,7 +7,7 @@ import org.pcap4j.packet.TcpPacket; import java.util.Objects; /** - * Utility methods for inspecting {@link PcapPacket} properties. Currently not used. + * Utility methods for inspecting {@link PcapPacket} properties. * * @author Janus Varmarken {@literal } * @author Rahmadi Trimananda {@literal } @@ -46,4 +46,26 @@ public final class PcapPacketUtils { return ipDst.equals(ip) && dstPort == port; } + /** + * Checks if {@code packet} wraps a TCP packet that has the SYN flag set. + * @param packet A {@link PcapPacket} that is suspected to contain a {@link TcpPacket} for which the SYN flag is set. + * @return {@code true} iff {@code packet} contains a {@code TcpPacket} for which the SYN flag is set, + * {@code false} otherwise. + */ + public static boolean isSyn(PcapPacket packet) { + TcpPacket tcp = packet.get(TcpPacket.class); + return tcp != null && tcp.getHeader().getSyn(); + } + + /** + * Checks if {@code packet} wraps a TCP packet that has the ACK flag set. + * @param packet A {@link PcapPacket} that is suspected to contain a {@link TcpPacket} for which the ACK flag is set. + * @return {@code true} iff {@code packet} contains a {@code TcpPacket} for which the ACK flag is set, + * {@code false} otherwise. + */ + public static boolean isAck(PcapPacket packet) { + TcpPacket tcp = packet.get(TcpPacket.class); + return tcp != null && tcp.getHeader().getAck(); + } + } -- 2.34.1