1 package edu.uci.iotproject.analysis.clustering;
3 import edu.uci.iotproject.DnsMap;
4 import edu.uci.iotproject.analysis.PcapPacketPair;
5 import org.apache.commons.math3.stat.clustering.Clusterable;
7 import java.util.Collection;
9 import java.util.stream.Collectors;
11 import static edu.uci.iotproject.util.PcapPacketUtils.getSourceIp;
14 * A wrapper for a {@link PcapPacketPair}, allowing it to be clustered using
15 * {@link org.apache.commons.math3.stat.clustering.DBSCANClusterer}. Specifically, this wrapper implements
16 * {@link org.apache.commons.math3.stat.clustering.Clusterable}, so that the interface of {@link PcapPacketPair}
17 * is not cluttered up by this helper method of the clustering API.
19 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
20 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
22 public class PcapPacketPairWrapper implements Clusterable<PcapPacketPair> {
25 * The wrapped {@link PcapPacketPair}.
27 private final PcapPacketPair mPktPair;
30 * IP to hostname mappings.
31 * Allows for grouping packets with different source IPs that map to the same hostname into one cluster.
33 private final DnsMap mDnsMap;
35 public PcapPacketPairWrapper(PcapPacketPair wrappedObject, DnsMap ipHostnameMap) {
36 mPktPair = wrappedObject;
37 mDnsMap = ipHostnameMap;
41 public double distanceFrom(PcapPacketPair that) {
42 // Extract src ips of both packets of each pair.
43 String thisSrc1 = getSourceIp(mPktPair.getFirst());
44 String thisSrc2 = mPktPair.getSecond().map(pp -> getSourceIp(pp)).orElse("");
45 String thatSrc1 = getSourceIp(that.getFirst());
46 String thatSrc2 = that.getSecond().map(pp -> getSourceIp(pp)).orElse("");
48 // Replace IPs with hostnames if possible.
49 thisSrc1 = mapToHostname(thisSrc1);
50 thisSrc2 = mapToHostname(thisSrc2);
51 thatSrc1 = mapToHostname(thatSrc1);
52 thatSrc2 = mapToHostname(thatSrc2);
54 if(!thisSrc1.equals(thatSrc1) || !thisSrc2.equals(thatSrc2)) {
55 // Distance is maximal if sources differ.
56 return Double.MAX_VALUE;
59 // If the sources match, the distance is the Euclidean distance between each pair of packet lengths.
60 int thisLen1 = mPktPair.getFirst().getOriginalLength();
61 // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
62 int thisLen2 = mPktPair.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
63 int thatLen1 = that.getFirst().getOriginalLength();
64 // TODO should discard pairs w/o second packet from clustering; replace below with getSecond().get() when done.
65 int thatLen2 = that.getSecond().map(pp -> pp.getOriginalLength()).orElse(0);
67 Math.pow(thisLen1 - thatLen1, 2) +
68 Math.pow(thisLen2 - thatLen2, 2)
73 public PcapPacketPair centroidOf(Collection<PcapPacketPair> p) {
74 // No notion of centroid in DBSCAN
75 throw new UnsupportedOperationException("Not implemented; no notion of a centroid in DBSCAN.");
79 private String mapToHostname(String ip) {
80 Set<String> hostnames = mDnsMap.getHostnamesForIp(ip);
81 if (hostnames != null && hostnames.size() > 0) {
82 // append hostnames back-to-back separated by a delimiter if more than one item in set
83 // note: use sorted() to ensure that output remains consistent (as Set has no internal ordering of elements)
84 String result = hostnames.stream().sorted().collect(Collectors.joining(" "));
85 if (hostnames.size() > 1) {
86 // One IP can map to multiple hostnames, although that is rare. For now just raise a warning.
87 String warningStr = String.format(
88 "%s.mapToHostname(): encountered an IP (%s) that maps to multiple hostnames (%s)",
89 getClass().getSimpleName(), ip, result);
90 System.err.println(warningStr);
94 // If unable to map to a hostname, return ip for ease of use; caller can overwrite input value, defaulting to
95 // the original value if no mapping is found:
96 // String src = "<some-ip>";
97 // src = mapToHostname(src); // src is now either a hostname or the original ip.