From ac0111289c4afdf76a921deef82db7a90cabd9a0 Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Mon, 17 Sep 2018 15:39:40 -0700 Subject: [PATCH] Finished java-based clustering implementation (note: had to move Clusterable implementation to PcapPacketPair and get rid of PcapPacketPairWrapper). --- .../main/java/edu/uci/iotproject/Main.java | 86 +++++++++++-- .../iotproject/analysis/PcapPacketPair.java | 114 ++++++++++++++++- .../clustering/PcapPacketPairWrapper.java | 121 ------------------ .../edu/uci/iotproject/util/PrintUtils.java | 13 ++ 4 files changed, 200 insertions(+), 134 deletions(-) delete mode 100644 Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/clustering/PcapPacketPairWrapper.java diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java index dcabbf5..8c83363 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java @@ -8,6 +8,8 @@ import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment; import edu.uci.iotproject.comparison.seqalignment.SequenceExtraction; import edu.uci.iotproject.io.TriggerTimesFileReader; import edu.uci.iotproject.util.PrintUtils; +import org.apache.commons.math3.stat.clustering.Cluster; +import org.apache.commons.math3.stat.clustering.DBSCANClusterer; import org.pcap4j.core.*; import org.pcap4j.packet.namednumber.DataLinkType; @@ -18,6 +20,8 @@ import java.net.UnknownHostException; import java.time.Instant; import java.util.*; import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * This is a system that reads PCAP files to compare @@ -37,8 +41,8 @@ public class Main { // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------ // Paths to input and output files (consider supplying these as arguments instead) and IP of the device for // which traffic is to be extracted: - String path = "/scratch/July-2018"; // Rahmadi -// String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus +// String path = "/scratch/July-2018"; // Rahmadi + String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus boolean verbose = true; final String onPairsPath = "/scratch/July-2018/on.txt"; final String offPairsPath = "/scratch/July-2018/off.txt"; @@ -88,10 +92,10 @@ public class Main { // final String deviceIp = "192.168.1.140"; // .246 == phone; .140 == TP-Link bulb // 7) Kwikset Doorlock August 6 experiment - final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock.wlan1.local.pcap"; - final String outputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-processed.pcap"; - final String triggerTimesFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-aug-6-2018.timestamps"; - final String deviceIp = "192.168.1.246"; // .246 == phone; .142 == SmartThings Hub (note: use eth0 capture for this!) +// final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock.wlan1.local.pcap"; +// final String outputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-processed.pcap"; +// final String triggerTimesFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-aug-6-2018.timestamps"; +// final String deviceIp = "192.168.1.246"; // .246 == phone; .142 == SmartThings Hub (note: use eth0 capture for this!) // 8) Hue Bulb August 7 experiment // final String inputPcapFile = path + "/2018-08/hue-bulb/hue-bulb.wlan1.local.pcap"; @@ -124,10 +128,10 @@ public class Main { // final String deviceIp = "192.168.1.246"; // .246 == phone; .229 == sprinkler // // 13) DLink siren August 14 experiment -// final String inputPcapFile = path + "/2018-08/dlink-siren/dlink-siren.wlan1.local.pcap"; -// final String outputPcapFile = path + "/2018-08/dlink-siren/dlink-siren-processed.pcap"; -// final String triggerTimesFile = path + "/2018-08/dlink-siren/dlink-siren-aug-14-2018.timestamps"; -// final String deviceIp = "192.168.1.183"; // .246 == phone; .183 == siren + final String inputPcapFile = path + "/2018-08/dlink-siren/dlink-siren.wlan1.local.pcap"; + final String outputPcapFile = path + "/2018-08/dlink-siren/dlink-siren-processed.pcap"; + final String triggerTimesFile = path + "/2018-08/dlink-siren/dlink-siren-aug-14-2018.timestamps"; + final String deviceIp = "192.168.1.183"; // .246 == phone; .183 == siren // 14) Nest thermostat August 15 experiment // final String inputPcapFile = path + "/2018-08/nest/nest.wlan1.local.pcap"; @@ -199,6 +203,12 @@ public class Main { + /* + * NOTE: no need to generate these more complex on/off maps that also contain mappings from hostname and + * sequence identifiers as we do not care about hostnames and sequences during clustering. + * We can simply use the UserAction->List map to generate ON/OFF groupings of conversations. + */ + /* // Contains all ON events: hostname -> sequence identifier -> list of conversations with that sequence Map>> ons = new HashMap<>(); // Contains all OFF events: hostname -> sequence identifier -> list of conversations with that sequence @@ -217,8 +227,60 @@ public class Main { }); }); }); + */ + + // ================================================ CLUSTERING ================================================ + // Note: no need to use the more convoluted on/off maps; can simply use the UserAction->List map + // when don't care about hostnames and sequences (see comment earlier). + List onConversations = userActionToConversations.entrySet().stream(). + filter(e -> e.getKey().getType() == Type.TOGGLE_ON). // drop all OFF events from stream + map(e -> e.getValue()). // no longer interested in the UserActions + flatMap(List::stream). // flatten List> to a List + collect(Collectors.toList()); + List offConversations = userActionToConversations.entrySet().stream(). + filter(e -> e.getKey().getType() == Type.TOGGLE_OFF). + map(e -> e.getValue()). + flatMap(List::stream). + collect(Collectors.toList()); + List onPairs = onConversations.stream(). + map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) : + TcpConversationUtils.extractPacketPairs(c)). + flatMap(List::stream). // flatten List> to List<> + collect(Collectors.toList()); + List offPairs = offConversations.stream(). + map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) : + TcpConversationUtils.extractPacketPairs(c)). + flatMap(List::stream). // flatten List> to List<> + collect(Collectors.toList()); + // Note: need to update the DnsMap of all PcapPacketPairs if we want to use the IP/hostname-sensitive distance. + Stream.concat(Stream.of(onPairs), Stream.of(offPairs)).flatMap(List::stream).forEach(p -> p.setDnsMap(dnsMap)); + // Perform clustering on conversation logged as part of all ON events. + DBSCANClusterer onClusterer = new DBSCANClusterer<>(10.0, 5); + List> onClusters = onClusterer.cluster(onPairs); + // Perform clustering on conversation logged as part of all OFF events. + DBSCANClusterer offClusterer = new DBSCANClusterer<>(10.0, 5); + List> offClusters = offClusterer.cluster(offPairs); + // Output clusters + System.out.println("========================================"); + System.out.println(" Clustering results for ON "); + System.out.println(" Number of clusters: " + onClusters.size()); + int count = 0; + for (Cluster c : onClusters) { + System.out.println(String.format("<<< Cluster #%02d (%03d points) >>>", ++count, c.getPoints().size())); + System.out.print(PrintUtils.toSummaryString(c)); + } + System.out.println("========================================"); + System.out.println(" Clustering results for OFF "); + System.out.println(" Number of clusters: " + offClusters.size()); + count = 0; + for (Cluster c : offClusters) { + System.out.println(String.format("<<< Cluster #%03d (%06d points) >>>", ++count, c.getPoints().size())); + System.out.print(PrintUtils.toSummaryString(c)); + } + System.out.println("========================================"); + // ============================================================================================================ - + /* System.out.println("==== ON ===="); // Print out all the pairs into a file for ON events File fileOnEvents = new File(onPairsPath); @@ -288,7 +350,7 @@ public class Main { } } pwOff.close(); - + */ // // ================================================================================================ // // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>> diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/PcapPacketPair.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/PcapPacketPair.java index a0918e4..2d6e9aa 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/PcapPacketPair.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/PcapPacketPair.java @@ -1,11 +1,18 @@ package edu.uci.iotproject.analysis; +import edu.uci.iotproject.DnsMap; import edu.uci.iotproject.util.PcapPacketUtils; +import org.apache.commons.math3.stat.clustering.Clusterable; import org.pcap4j.core.PcapPacket; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.Collection; import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import static edu.uci.iotproject.util.PcapPacketUtils.getSourceIp; /** *

@@ -17,12 +24,27 @@ import java.util.Optional; * @author Janus Varmarken {@literal } * @author Rahmadi Trimananda {@literal } */ -public class PcapPacketPair { +public class PcapPacketPair implements Clusterable { + + /** + * If {@code true}, {@link #distanceFrom(PcapPacketPair)} will only consider if the sources of the two packets in + * the {@link PcapPacketPair}s being compared match in terms of whether the IP is a local or a remote IP. It will + * not check if the IPs/hostnames are actually the same. Set to {@code false} to make the comparison more + * strict, i.e., to enforce the requirement that the respective IPs (or hostnames) in the packets of the two + * {@link PcapPacketPair}s must be identical. + */ + private static final boolean SIMPLIFIED_SOURCE_COMPARISON = true; private final PcapPacket mFirst; private final Optional mSecond; + /** + * IP to hostname mappings. + * Allows for grouping packets with different source IPs that map to the same hostname into one cluster. + */ + private DnsMap mDnsMap; // TODO implement and invoke setter + public PcapPacketPair(PcapPacket first, PcapPacket second) { mFirst = first; mSecond = Optional.ofNullable(second); @@ -60,6 +82,22 @@ public class PcapPacketPair { } } + /** + * Get the {@link DnsMap} that is queried for hostnames mappings when performing IP/hostname-sensitive clustering. + * @return the {@link DnsMap} that is queried for hostnames mappings when performing IP/hostname-sensitive clustering. + */ + public DnsMap getDnsMap() { + return mDnsMap; + } + + /** + * Set the {@link DnsMap} to be queried for hostnames mappings when performing IP/hostname-sensitive clustering. + * @param dnsMap a {@code DnsMap} to be queried for hostnames mappings when performing IP/hostname-sensitive clustering. + */ + public void setDnsMap(final DnsMap dnsMap) { + mDnsMap = dnsMap; + } + @Override public String toString() { return String.format("%d, %s", @@ -67,4 +105,78 @@ public class PcapPacketPair { getSecond().map(pkt -> Integer.toString(pkt.getOriginalLength())).orElse("null")); } + // ================================================================================================================= + // Begin implementation of org.apache.commons.math3.stat.clustering.Clusterable interface + @Override + public double distanceFrom(PcapPacketPair that) { + if (SIMPLIFIED_SOURCE_COMPARISON) { + // Direction of packets in terms of client-to-server or server-to-client must match, but we don't care about + // IPs and hostnames + if (this.isFirstClient() != that.isFirstClient() || this.isSecondClient() != that.isSecondClient()) { + // Distance is maximal if mismatch in direction of packets + return Double.MAX_VALUE; + } + } else { + // Strict mode enabled: IPs/hostnames must match! + // Extract src ips of both packets of each pair. + String thisSrc1 = getSourceIp(this.getFirst()); + String thisSrc2 = this.getSecond().map(pp -> getSourceIp(pp)).orElse(""); + String thatSrc1 = getSourceIp(that.getFirst()); + String thatSrc2 = that.getSecond().map(pp -> getSourceIp(pp)).orElse(""); + + // Replace IPs with hostnames if possible. + thisSrc1 = mapToHostname(thisSrc1); + thisSrc2 = mapToHostname(thisSrc2); + thatSrc1 = mapToHostname(thatSrc1); + thatSrc2 = mapToHostname(thatSrc2); + + if(!thisSrc1.equals(thatSrc1) || !thisSrc2.equals(thatSrc2)) { + // Distance is maximal if sources differ. + return Double.MAX_VALUE; + } + } + + // If the sources match, the distance is the Euclidean distance between each pair of packet lengths. + int thisLen1 = this.getFirst().getOriginalLength(); + // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done. + int thisLen2 = this.getSecond().map(pp -> pp.getOriginalLength()).orElse(0); + int thatLen1 = that.getFirst().getOriginalLength(); + // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done. + int thatLen2 = that.getSecond().map(pp -> pp.getOriginalLength()).orElse(0); + return Math.sqrt( + Math.pow(thisLen1 - thatLen1, 2) + + Math.pow(thisLen2 - thatLen2, 2) + ); + } + + @Override + public PcapPacketPair centroidOf(Collection p) { + // No notion of centroid in DBSCAN + throw new UnsupportedOperationException("Not implemented; no notion of a centroid in DBSCAN."); + } + // End implementation of org.apache.commons.math3.stat.clustering.Clusterable interface + // ================================================================================================================= + + private String mapToHostname(String ip) { + Set hostnames = mDnsMap.getHostnamesForIp(ip); + if (hostnames != null && hostnames.size() > 0) { + // append hostnames back-to-back separated by a delimiter if more than one item in set + // note: use sorted() to ensure that output remains consistent (as Set has no internal ordering of elements) + String result = hostnames.stream().sorted().collect(Collectors.joining(" ")); + if (hostnames.size() > 1) { + // One IP can map to multiple hostnames, although that is rare. For now just raise a warning. + String warningStr = String.format( + "%s.mapToHostname(): encountered an IP (%s) that maps to multiple hostnames (%s)", + getClass().getSimpleName(), ip, result); + System.err.println(warningStr); + } + return result; + } + // If unable to map to a hostname, return ip for ease of use; caller can overwrite input value, defaulting to + // the original value if no mapping is found: + // String src = ""; + // src = mapToHostname(src); // src is now either a hostname or the original ip. + return ip; + } + } diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/clustering/PcapPacketPairWrapper.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/clustering/PcapPacketPairWrapper.java deleted file mode 100644 index 3d33e43..0000000 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/clustering/PcapPacketPairWrapper.java +++ /dev/null @@ -1,121 +0,0 @@ -package edu.uci.iotproject.analysis.clustering; - -import edu.uci.iotproject.DnsMap; -import edu.uci.iotproject.analysis.PcapPacketPair; -import org.apache.commons.math3.stat.clustering.Clusterable; - -import java.util.Collection; -import java.util.Set; -import java.util.stream.Collectors; - -import static edu.uci.iotproject.util.PcapPacketUtils.getSourceIp; - -/** - * A wrapper for a {@link PcapPacketPair}, allowing it to be clustered using - * {@link org.apache.commons.math3.stat.clustering.DBSCANClusterer}. Specifically, this wrapper implements - * {@link org.apache.commons.math3.stat.clustering.Clusterable}, so that the interface of {@link PcapPacketPair} - * is not cluttered up by this helper method of the clustering API. - * - * @author Janus Varmarken {@literal } - * @author Rahmadi Trimananda {@literal } - */ -public class PcapPacketPairWrapper implements Clusterable { - - /** - * If {@code true}, {@link #distanceFrom(PcapPacketPair)} will only consider if the sources of the two packets in - * the {@link PcapPacketPair}s being compared match in terms of whether the IP is a local or a remote IP. It will - * not check if the IPs/hostnames are actually the same. Set to {@code false} to make the comparison more - * strict, i.e., to enforce the requirement that the respective IPs (or hostnames) in the packets of the two - * {@link PcapPacketPair}s must be identical. - */ - private static final boolean SIMPLIFIED_SOURCE_COMPARISON = true; - - /** - * The wrapped {@link PcapPacketPair}. - */ - private final PcapPacketPair mPktPair; - - /** - * IP to hostname mappings. - * Allows for grouping packets with different source IPs that map to the same hostname into one cluster. - */ - private final DnsMap mDnsMap; - - public PcapPacketPairWrapper(PcapPacketPair wrappedObject, DnsMap ipHostnameMap) { - mPktPair = wrappedObject; - mDnsMap = ipHostnameMap; - } - - @Override - public double distanceFrom(PcapPacketPair that) { - if (SIMPLIFIED_SOURCE_COMPARISON) { - // Direction of packets in terms of client-to-server or server-to-client must match, but we don't care about - // IPs and hostnames - if (mPktPair.isFirstClient() != that.isFirstClient() || - mPktPair.isSecondClient() != that.isSecondClient()) { - // Distance is maximal if mismatch in direction of packets - return Double.MAX_VALUE; - } - } else { - // Strict mode enabled: IPs/hostnames must match! - // Extract src ips of both packets of each pair. - String thisSrc1 = getSourceIp(mPktPair.getFirst()); - String thisSrc2 = mPktPair.getSecond().map(pp -> getSourceIp(pp)).orElse(""); - String thatSrc1 = getSourceIp(that.getFirst()); - String thatSrc2 = that.getSecond().map(pp -> getSourceIp(pp)).orElse(""); - - // Replace IPs with hostnames if possible. - thisSrc1 = mapToHostname(thisSrc1); - thisSrc2 = mapToHostname(thisSrc2); - thatSrc1 = mapToHostname(thatSrc1); - thatSrc2 = mapToHostname(thatSrc2); - - if(!thisSrc1.equals(thatSrc1) || !thisSrc2.equals(thatSrc2)) { - // Distance is maximal if sources differ. - return Double.MAX_VALUE; - } - } - - // If the sources match, the distance is the Euclidean distance between each pair of packet lengths. - int thisLen1 = mPktPair.getFirst().getOriginalLength(); - // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done. - int thisLen2 = mPktPair.getSecond().map(pp -> pp.getOriginalLength()).orElse(0); - int thatLen1 = that.getFirst().getOriginalLength(); - // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done. - int thatLen2 = that.getSecond().map(pp -> pp.getOriginalLength()).orElse(0); - return Math.sqrt( - Math.pow(thisLen1 - thatLen1, 2) + - Math.pow(thisLen2 - thatLen2, 2) - ); - } - - @Override - public PcapPacketPair centroidOf(Collection p) { - // No notion of centroid in DBSCAN - throw new UnsupportedOperationException("Not implemented; no notion of a centroid in DBSCAN."); - } - - - private String mapToHostname(String ip) { - Set hostnames = mDnsMap.getHostnamesForIp(ip); - if (hostnames != null && hostnames.size() > 0) { - // append hostnames back-to-back separated by a delimiter if more than one item in set - // note: use sorted() to ensure that output remains consistent (as Set has no internal ordering of elements) - String result = hostnames.stream().sorted().collect(Collectors.joining(" ")); - if (hostnames.size() > 1) { - // One IP can map to multiple hostnames, although that is rare. For now just raise a warning. - String warningStr = String.format( - "%s.mapToHostname(): encountered an IP (%s) that maps to multiple hostnames (%s)", - getClass().getSimpleName(), ip, result); - System.err.println(warningStr); - } - return result; - } - // If unable to map to a hostname, return ip for ease of use; caller can overwrite input value, defaulting to - // the original value if no mapping is found: - // String src = ""; - // src = mapToHostname(src); // src is now either a hostname or the original ip. - return ip; - } - -} diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PrintUtils.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PrintUtils.java index ae6b578..c685d7d 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PrintUtils.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PrintUtils.java @@ -2,6 +2,7 @@ package edu.uci.iotproject.util; import edu.uci.iotproject.DnsMap; import edu.uci.iotproject.analysis.PcapPacketPair; +import org.apache.commons.math3.stat.clustering.Cluster; import java.util.Optional; import java.util.Set; @@ -81,4 +82,16 @@ public class PrintUtils { secondSrcCorS); } + /** + * Generate a string that summarizes/describes {@code cluster}. + * @param cluster The {@link Cluster} to summarize/describe. + * @return A string that summarizes/describes {@code cluster}. + */ + public static String toSummaryString(Cluster cluster) { + StringBuilder sb = new StringBuilder(); + for (PcapPacketPair ppp : cluster.getPoints()) { + sb.append(toCsv(ppp, ppp.getDnsMap()) + System.lineSeparator()); + } + return sb.toString(); + } } -- 2.34.1