import edu.uci.iotproject.comparison.seqalignment.SequenceExtraction;
import edu.uci.iotproject.io.TriggerTimesFileReader;
import edu.uci.iotproject.util.PrintUtils;
+import org.apache.commons.math3.stat.clustering.Cluster;
+import org.apache.commons.math3.stat.clustering.DBSCANClusterer;
import org.pcap4j.core.*;
import org.pcap4j.packet.namednumber.DataLinkType;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* This is a system that reads PCAP files to compare
// ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------
// Paths to input and output files (consider supplying these as arguments instead) and IP of the device for
// which traffic is to be extracted:
- String path = "/scratch/July-2018"; // Rahmadi
-// String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
+// String path = "/scratch/July-2018"; // Rahmadi
+ String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
boolean verbose = true;
final String onPairsPath = "/scratch/July-2018/on.txt";
final String offPairsPath = "/scratch/July-2018/off.txt";
// final String deviceIp = "192.168.1.140"; // .246 == phone; .140 == TP-Link bulb
// 7) Kwikset Doorlock August 6 experiment
- final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock.wlan1.local.pcap";
- final String outputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-processed.pcap";
- final String triggerTimesFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-aug-6-2018.timestamps";
- final String deviceIp = "192.168.1.246"; // .246 == phone; .142 == SmartThings Hub (note: use eth0 capture for this!)
+// final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock.wlan1.local.pcap";
+// final String outputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-processed.pcap";
+// final String triggerTimesFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-aug-6-2018.timestamps";
+// final String deviceIp = "192.168.1.246"; // .246 == phone; .142 == SmartThings Hub (note: use eth0 capture for this!)
// 8) Hue Bulb August 7 experiment
// final String inputPcapFile = path + "/2018-08/hue-bulb/hue-bulb.wlan1.local.pcap";
// final String deviceIp = "192.168.1.246"; // .246 == phone; .229 == sprinkler
// // 13) DLink siren August 14 experiment
-// final String inputPcapFile = path + "/2018-08/dlink-siren/dlink-siren.wlan1.local.pcap";
-// final String outputPcapFile = path + "/2018-08/dlink-siren/dlink-siren-processed.pcap";
-// final String triggerTimesFile = path + "/2018-08/dlink-siren/dlink-siren-aug-14-2018.timestamps";
-// final String deviceIp = "192.168.1.183"; // .246 == phone; .183 == siren
+ final String inputPcapFile = path + "/2018-08/dlink-siren/dlink-siren.wlan1.local.pcap";
+ final String outputPcapFile = path + "/2018-08/dlink-siren/dlink-siren-processed.pcap";
+ final String triggerTimesFile = path + "/2018-08/dlink-siren/dlink-siren-aug-14-2018.timestamps";
+ final String deviceIp = "192.168.1.183"; // .246 == phone; .183 == siren
// 14) Nest thermostat August 15 experiment
// final String inputPcapFile = path + "/2018-08/nest/nest.wlan1.local.pcap";
+ /*
+ * NOTE: no need to generate these more complex on/off maps that also contain mappings from hostname and
+ * sequence identifiers as we do not care about hostnames and sequences during clustering.
+ * We can simply use the UserAction->List<Conversation> map to generate ON/OFF groupings of conversations.
+ */
+ /*
// Contains all ON events: hostname -> sequence identifier -> list of conversations with that sequence
Map<String, Map<String, List<Conversation>>> ons = new HashMap<>();
// Contains all OFF events: hostname -> sequence identifier -> list of conversations with that sequence
});
});
});
+ */
+
+ // ================================================ CLUSTERING ================================================
+ // Note: no need to use the more convoluted on/off maps; can simply use the UserAction->List<Conversation> map
+ // when don't care about hostnames and sequences (see comment earlier).
+ List<Conversation> onConversations = userActionToConversations.entrySet().stream().
+ filter(e -> e.getKey().getType() == Type.TOGGLE_ON). // drop all OFF events from stream
+ map(e -> e.getValue()). // no longer interested in the UserActions
+ flatMap(List::stream). // flatten List<List<T>> to a List<T>
+ collect(Collectors.toList());
+ List<Conversation> offConversations = userActionToConversations.entrySet().stream().
+ filter(e -> e.getKey().getType() == Type.TOGGLE_OFF).
+ map(e -> e.getValue()).
+ flatMap(List::stream).
+ collect(Collectors.toList());
+ List<PcapPacketPair> onPairs = onConversations.stream().
+ map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
+ TcpConversationUtils.extractPacketPairs(c)).
+ flatMap(List::stream). // flatten List<List<>> to List<>
+ collect(Collectors.toList());
+ List<PcapPacketPair> offPairs = offConversations.stream().
+ map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
+ TcpConversationUtils.extractPacketPairs(c)).
+ flatMap(List::stream). // flatten List<List<>> to List<>
+ collect(Collectors.toList());
+ // Note: need to update the DnsMap of all PcapPacketPairs if we want to use the IP/hostname-sensitive distance.
+ Stream.concat(Stream.of(onPairs), Stream.of(offPairs)).flatMap(List::stream).forEach(p -> p.setDnsMap(dnsMap));
+ // Perform clustering on conversation logged as part of all ON events.
+ DBSCANClusterer<PcapPacketPair> onClusterer = new DBSCANClusterer<>(10.0, 5);
+ List<Cluster<PcapPacketPair>> onClusters = onClusterer.cluster(onPairs);
+ // Perform clustering on conversation logged as part of all OFF events.
+ DBSCANClusterer<PcapPacketPair> offClusterer = new DBSCANClusterer<>(10.0, 5);
+ List<Cluster<PcapPacketPair>> offClusters = offClusterer.cluster(offPairs);
+ // Output clusters
+ System.out.println("========================================");
+ System.out.println(" Clustering results for ON ");
+ System.out.println(" Number of clusters: " + onClusters.size());
+ int count = 0;
+ for (Cluster<PcapPacketPair> c : onClusters) {
+ System.out.println(String.format("<<< Cluster #%02d (%03d points) >>>", ++count, c.getPoints().size()));
+ System.out.print(PrintUtils.toSummaryString(c));
+ }
+ System.out.println("========================================");
+ System.out.println(" Clustering results for OFF ");
+ System.out.println(" Number of clusters: " + offClusters.size());
+ count = 0;
+ for (Cluster<PcapPacketPair> c : offClusters) {
+ System.out.println(String.format("<<< Cluster #%03d (%06d points) >>>", ++count, c.getPoints().size()));
+ System.out.print(PrintUtils.toSummaryString(c));
+ }
+ System.out.println("========================================");
+ // ============================================================================================================
-
+ /*
System.out.println("==== ON ====");
// Print out all the pairs into a file for ON events
File fileOnEvents = new File(onPairsPath);
}
}
pwOff.close();
-
+ */
// // ================================================================================================
// // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
package edu.uci.iotproject.analysis;
+import edu.uci.iotproject.DnsMap;
import edu.uci.iotproject.util.PcapPacketUtils;
+import org.apache.commons.math3.stat.clustering.Clusterable;
import org.pcap4j.core.PcapPacket;
import java.net.InetAddress;
import java.net.UnknownHostException;
+import java.util.Collection;
import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static edu.uci.iotproject.util.PcapPacketUtils.getSourceIp;
/**
* <p>
* @author Janus Varmarken {@literal <jvarmark@uci.edu>}
* @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
*/
-public class PcapPacketPair {
+public class PcapPacketPair implements Clusterable<PcapPacketPair> {
+
+ /**
+ * If {@code true}, {@link #distanceFrom(PcapPacketPair)} will only consider if the sources of the two packets in
+ * the {@link PcapPacketPair}s being compared match in terms of whether the IP is a local or a remote IP. It will
+ * <em>not</em> check if the IPs/hostnames are actually the same. Set to {@code false} to make the comparison more
+ * strict, i.e., to enforce the requirement that the respective IPs (or hostnames) in the packets of the two
+ * {@link PcapPacketPair}s must be identical.
+ */
+ private static final boolean SIMPLIFIED_SOURCE_COMPARISON = true;
private final PcapPacket mFirst;
private final Optional<PcapPacket> mSecond;
+ /**
+ * IP to hostname mappings.
+ * Allows for grouping packets with different source IPs that map to the same hostname into one cluster.
+ */
+ private DnsMap mDnsMap; // TODO implement and invoke setter
+
public PcapPacketPair(PcapPacket first, PcapPacket second) {
mFirst = first;
mSecond = Optional.ofNullable(second);
}
}
+ /**
+ * Get the {@link DnsMap} that is queried for hostnames mappings when performing IP/hostname-sensitive clustering.
+ * @return the {@link DnsMap} that is queried for hostnames mappings when performing IP/hostname-sensitive clustering.
+ */
+ public DnsMap getDnsMap() {
+ return mDnsMap;
+ }
+
+ /**
+ * Set the {@link DnsMap} to be queried for hostnames mappings when performing IP/hostname-sensitive clustering.
+ * @param dnsMap a {@code DnsMap} to be queried for hostnames mappings when performing IP/hostname-sensitive clustering.
+ */
+ public void setDnsMap(final DnsMap dnsMap) {
+ mDnsMap = dnsMap;
+ }
+
@Override
public String toString() {
return String.format("%d, %s",
getSecond().map(pkt -> Integer.toString(pkt.getOriginalLength())).orElse("null"));
}
+ // =================================================================================================================
+ // Begin implementation of org.apache.commons.math3.stat.clustering.Clusterable interface
+ @Override
+ public double distanceFrom(PcapPacketPair that) {
+ if (SIMPLIFIED_SOURCE_COMPARISON) {
+ // Direction of packets in terms of client-to-server or server-to-client must match, but we don't care about
+ // IPs and hostnames
+ if (this.isFirstClient() != that.isFirstClient() || this.isSecondClient() != that.isSecondClient()) {
+ // Distance is maximal if mismatch in direction of packets
+ return Double.MAX_VALUE;
+ }
+ } else {
+ // Strict mode enabled: IPs/hostnames must match!
+ // Extract src ips of both packets of each pair.
+ String thisSrc1 = getSourceIp(this.getFirst());
+ String thisSrc2 = this.getSecond().map(pp -> getSourceIp(pp)).orElse("");
+ String thatSrc1 = getSourceIp(that.getFirst());
+ String thatSrc2 = that.getSecond().map(pp -> getSourceIp(pp)).orElse("");
+
+ // Replace IPs with hostnames if possible.
+ thisSrc1 = mapToHostname(thisSrc1);
+ thisSrc2 = mapToHostname(thisSrc2);
+ thatSrc1 = mapToHostname(thatSrc1);
+ thatSrc2 = mapToHostname(thatSrc2);
+
+ if(!thisSrc1.equals(thatSrc1) || !thisSrc2.equals(thatSrc2)) {
+ // Distance is maximal if sources differ.
+ return Double.MAX_VALUE;
+ }
+ }
+
+ // If the sources match, the distance is the Euclidean distance between each pair of packet lengths.
+ int thisLen1 = this.getFirst().getOriginalLength();
+ // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
+ int thisLen2 = this.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
+ int thatLen1 = that.getFirst().getOriginalLength();
+ // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
+ int thatLen2 = that.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
+ return Math.sqrt(
+ Math.pow(thisLen1 - thatLen1, 2) +
+ Math.pow(thisLen2 - thatLen2, 2)
+ );
+ }
+
+ @Override
+ public PcapPacketPair centroidOf(Collection<PcapPacketPair> p) {
+ // No notion of centroid in DBSCAN
+ throw new UnsupportedOperationException("Not implemented; no notion of a centroid in DBSCAN.");
+ }
+ // End implementation of org.apache.commons.math3.stat.clustering.Clusterable interface
+ // =================================================================================================================
+
+ private String mapToHostname(String ip) {
+ Set<String> hostnames = mDnsMap.getHostnamesForIp(ip);
+ if (hostnames != null && hostnames.size() > 0) {
+ // append hostnames back-to-back separated by a delimiter if more than one item in set
+ // note: use sorted() to ensure that output remains consistent (as Set has no internal ordering of elements)
+ String result = hostnames.stream().sorted().collect(Collectors.joining(" "));
+ if (hostnames.size() > 1) {
+ // One IP can map to multiple hostnames, although that is rare. For now just raise a warning.
+ String warningStr = String.format(
+ "%s.mapToHostname(): encountered an IP (%s) that maps to multiple hostnames (%s)",
+ getClass().getSimpleName(), ip, result);
+ System.err.println(warningStr);
+ }
+ return result;
+ }
+ // If unable to map to a hostname, return ip for ease of use; caller can overwrite input value, defaulting to
+ // the original value if no mapping is found:
+ // String src = "<some-ip>";
+ // src = mapToHostname(src); // src is now either a hostname or the original ip.
+ return ip;
+ }
+
}
+++ /dev/null
-package edu.uci.iotproject.analysis.clustering;
-
-import edu.uci.iotproject.DnsMap;
-import edu.uci.iotproject.analysis.PcapPacketPair;
-import org.apache.commons.math3.stat.clustering.Clusterable;
-
-import java.util.Collection;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import static edu.uci.iotproject.util.PcapPacketUtils.getSourceIp;
-
-/**
- * A wrapper for a {@link PcapPacketPair}, allowing it to be clustered using
- * {@link org.apache.commons.math3.stat.clustering.DBSCANClusterer}. Specifically, this wrapper implements
- * {@link org.apache.commons.math3.stat.clustering.Clusterable}, so that the interface of {@link PcapPacketPair}
- * is not cluttered up by this helper method of the clustering API.
- *
- * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
- * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
- */
-public class PcapPacketPairWrapper implements Clusterable<PcapPacketPair> {
-
- /**
- * If {@code true}, {@link #distanceFrom(PcapPacketPair)} will only consider if the sources of the two packets in
- * the {@link PcapPacketPair}s being compared match in terms of whether the IP is a local or a remote IP. It will
- * <em>not</em> check if the IPs/hostnames are actually the same. Set to {@code false} to make the comparison more
- * strict, i.e., to enforce the requirement that the respective IPs (or hostnames) in the packets of the two
- * {@link PcapPacketPair}s must be identical.
- */
- private static final boolean SIMPLIFIED_SOURCE_COMPARISON = true;
-
- /**
- * The wrapped {@link PcapPacketPair}.
- */
- private final PcapPacketPair mPktPair;
-
- /**
- * IP to hostname mappings.
- * Allows for grouping packets with different source IPs that map to the same hostname into one cluster.
- */
- private final DnsMap mDnsMap;
-
- public PcapPacketPairWrapper(PcapPacketPair wrappedObject, DnsMap ipHostnameMap) {
- mPktPair = wrappedObject;
- mDnsMap = ipHostnameMap;
- }
-
- @Override
- public double distanceFrom(PcapPacketPair that) {
- if (SIMPLIFIED_SOURCE_COMPARISON) {
- // Direction of packets in terms of client-to-server or server-to-client must match, but we don't care about
- // IPs and hostnames
- if (mPktPair.isFirstClient() != that.isFirstClient() ||
- mPktPair.isSecondClient() != that.isSecondClient()) {
- // Distance is maximal if mismatch in direction of packets
- return Double.MAX_VALUE;
- }
- } else {
- // Strict mode enabled: IPs/hostnames must match!
- // Extract src ips of both packets of each pair.
- String thisSrc1 = getSourceIp(mPktPair.getFirst());
- String thisSrc2 = mPktPair.getSecond().map(pp -> getSourceIp(pp)).orElse("");
- String thatSrc1 = getSourceIp(that.getFirst());
- String thatSrc2 = that.getSecond().map(pp -> getSourceIp(pp)).orElse("");
-
- // Replace IPs with hostnames if possible.
- thisSrc1 = mapToHostname(thisSrc1);
- thisSrc2 = mapToHostname(thisSrc2);
- thatSrc1 = mapToHostname(thatSrc1);
- thatSrc2 = mapToHostname(thatSrc2);
-
- if(!thisSrc1.equals(thatSrc1) || !thisSrc2.equals(thatSrc2)) {
- // Distance is maximal if sources differ.
- return Double.MAX_VALUE;
- }
- }
-
- // If the sources match, the distance is the Euclidean distance between each pair of packet lengths.
- int thisLen1 = mPktPair.getFirst().getOriginalLength();
- // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
- int thisLen2 = mPktPair.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
- int thatLen1 = that.getFirst().getOriginalLength();
- // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
- int thatLen2 = that.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
- return Math.sqrt(
- Math.pow(thisLen1 - thatLen1, 2) +
- Math.pow(thisLen2 - thatLen2, 2)
- );
- }
-
- @Override
- public PcapPacketPair centroidOf(Collection<PcapPacketPair> p) {
- // No notion of centroid in DBSCAN
- throw new UnsupportedOperationException("Not implemented; no notion of a centroid in DBSCAN.");
- }
-
-
- private String mapToHostname(String ip) {
- Set<String> hostnames = mDnsMap.getHostnamesForIp(ip);
- if (hostnames != null && hostnames.size() > 0) {
- // append hostnames back-to-back separated by a delimiter if more than one item in set
- // note: use sorted() to ensure that output remains consistent (as Set has no internal ordering of elements)
- String result = hostnames.stream().sorted().collect(Collectors.joining(" "));
- if (hostnames.size() > 1) {
- // One IP can map to multiple hostnames, although that is rare. For now just raise a warning.
- String warningStr = String.format(
- "%s.mapToHostname(): encountered an IP (%s) that maps to multiple hostnames (%s)",
- getClass().getSimpleName(), ip, result);
- System.err.println(warningStr);
- }
- return result;
- }
- // If unable to map to a hostname, return ip for ease of use; caller can overwrite input value, defaulting to
- // the original value if no mapping is found:
- // String src = "<some-ip>";
- // src = mapToHostname(src); // src is now either a hostname or the original ip.
- return ip;
- }
-
-}
import edu.uci.iotproject.DnsMap;
import edu.uci.iotproject.analysis.PcapPacketPair;
+import org.apache.commons.math3.stat.clustering.Cluster;
import java.util.Optional;
import java.util.Set;
secondSrcCorS);
}
+ /**
+ * Generate a string that summarizes/describes {@code cluster}.
+ * @param cluster The {@link Cluster} to summarize/describe.
+ * @return A string that summarizes/describes {@code cluster}.
+ */
+ public static String toSummaryString(Cluster<PcapPacketPair> cluster) {
+ StringBuilder sb = new StringBuilder();
+ for (PcapPacketPair ppp : cluster.getPoints()) {
+ sb.append(toCsv(ppp, ppp.getDnsMap()) + System.lineSeparator());
+ }
+ return sb.toString();
+ }
}