1 package edu.uci.iotproject.analysis.clustering;
3 import edu.uci.iotproject.DnsMap;
4 import edu.uci.iotproject.analysis.PcapPacketPair;
5 import org.apache.commons.math3.stat.clustering.Clusterable;
7 import java.util.Collection;
9 import java.util.stream.Collectors;
11 import static edu.uci.iotproject.util.PcapPacketUtils.getSourceIp;
14 * A wrapper for a {@link PcapPacketPair}, allowing it to be clustered using
15 * {@link org.apache.commons.math3.stat.clustering.DBSCANClusterer}. Specifically, this wrapper implements
16 * {@link org.apache.commons.math3.stat.clustering.Clusterable}, so that the interface of {@link PcapPacketPair}
17 * is not cluttered up by this helper method of the clustering API.
19 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
20 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
22 public class PcapPacketPairWrapper implements Clusterable<PcapPacketPair> {
25 * If {@code true}, {@link #distanceFrom(PcapPacketPair)} will only consider if the sources of the two packets in
26 * the {@link PcapPacketPair}s being compared match in terms of whether the IP is a local or a remote IP. It will
27 * <em>not</em> check if the IPs/hostnames are actually the same. Set to {@code false} to make the comparison more
28 * strict, i.e., to enforce the requirement that the respective IPs (or hostnames) in the packets of the two
29 * {@link PcapPacketPair}s must be identical.
31 private static final boolean SIMPLIFIED_SOURCE_COMPARISON = true;
34 * The wrapped {@link PcapPacketPair}.
36 private final PcapPacketPair mPktPair;
39 * IP to hostname mappings.
40 * Allows for grouping packets with different source IPs that map to the same hostname into one cluster.
42 private final DnsMap mDnsMap;
44 public PcapPacketPairWrapper(PcapPacketPair wrappedObject, DnsMap ipHostnameMap) {
45 mPktPair = wrappedObject;
46 mDnsMap = ipHostnameMap;
50 public double distanceFrom(PcapPacketPair that) {
51 if (SIMPLIFIED_SOURCE_COMPARISON) {
52 // Direction of packets in terms of client-to-server or server-to-client must match, but we don't care about
54 if (mPktPair.isFirstClient() != that.isFirstClient() ||
55 mPktPair.isSecondClient() != that.isSecondClient()) {
56 // Distance is maximal if mismatch in direction of packets
57 return Double.MAX_VALUE;
60 // Strict mode enabled: IPs/hostnames must match!
61 // Extract src ips of both packets of each pair.
62 String thisSrc1 = getSourceIp(mPktPair.getFirst());
63 String thisSrc2 = mPktPair.getSecond().map(pp -> getSourceIp(pp)).orElse("");
64 String thatSrc1 = getSourceIp(that.getFirst());
65 String thatSrc2 = that.getSecond().map(pp -> getSourceIp(pp)).orElse("");
67 // Replace IPs with hostnames if possible.
68 thisSrc1 = mapToHostname(thisSrc1);
69 thisSrc2 = mapToHostname(thisSrc2);
70 thatSrc1 = mapToHostname(thatSrc1);
71 thatSrc2 = mapToHostname(thatSrc2);
73 if(!thisSrc1.equals(thatSrc1) || !thisSrc2.equals(thatSrc2)) {
74 // Distance is maximal if sources differ.
75 return Double.MAX_VALUE;
79 // If the sources match, the distance is the Euclidean distance between each pair of packet lengths.
80 int thisLen1 = mPktPair.getFirst().getOriginalLength();
81 // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
82 int thisLen2 = mPktPair.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
83 int thatLen1 = that.getFirst().getOriginalLength();
84 // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
85 int thatLen2 = that.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
87 Math.pow(thisLen1 - thatLen1, 2) +
88 Math.pow(thisLen2 - thatLen2, 2)
93 public PcapPacketPair centroidOf(Collection<PcapPacketPair> p) {
94 // No notion of centroid in DBSCAN
95 throw new UnsupportedOperationException("Not implemented; no notion of a centroid in DBSCAN.");
99 private String mapToHostname(String ip) {
100 Set<String> hostnames = mDnsMap.getHostnamesForIp(ip);
101 if (hostnames != null && hostnames.size() > 0) {
102 // append hostnames back-to-back separated by a delimiter if more than one item in set
103 // note: use sorted() to ensure that output remains consistent (as Set has no internal ordering of elements)
104 String result = hostnames.stream().sorted().collect(Collectors.joining(" "));
105 if (hostnames.size() > 1) {
106 // One IP can map to multiple hostnames, although that is rare. For now just raise a warning.
107 String warningStr = String.format(
108 "%s.mapToHostname(): encountered an IP (%s) that maps to multiple hostnames (%s)",
109 getClass().getSimpleName(), ip, result);
110 System.err.println(warningStr);
114 // If unable to map to a hostname, return ip for ease of use; caller can overwrite input value, defaulting to
115 // the original value if no mapping is found:
116 // String src = "<some-ip>";
117 // src = mapToHostname(src); // src is now either a hostname or the original ip.