1 package edu.uci.iotproject.detection;
3 import edu.uci.iotproject.Conversation;
4 import edu.uci.iotproject.TcpReassembler;
5 import edu.uci.iotproject.analysis.TcpConversationUtils;
6 import edu.uci.iotproject.io.PcapHandleReader;
7 import edu.uci.iotproject.util.PrintUtils;
8 import org.pcap4j.core.*;
10 import java.time.ZoneId;
12 import java.util.stream.Collectors;
14 import static edu.uci.iotproject.util.PcapPacketUtils.*;
17 * Searches a traffic trace for sequences of packets "belong to" a given cluster (in other words, attempts to classify
18 * traffic as pertaining to a given cluster).
20 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
21 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
23 public class ClusterMatcher implements PacketListener {
26 public static void main(String[] args) throws PcapNativeException, NotOpenException {
28 // String path = "/scratch/July-2018"; // Rahmadi
29 String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
30 final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
31 final String signatureFile = path + "/2018-07/dlink/offSignature1.sig";
33 List<List<PcapPacket>> signature = PrintUtils.deserializeClustersFromFile(signatureFile);
34 ClusterMatcher clusterMatcher = new ClusterMatcher(signature, null,
35 (sig, match) -> System.out.println(
36 String.format("[ !!! SIGNATURE DETECTED AT %s !!! ]",
37 match.get(0).getTimestamp().atZone(ZoneId.of("America/Los_Angeles")))
43 handle = Pcaps.openOffline(inputPcapFile, PcapHandle.TimestampPrecision.NANO);
44 } catch (PcapNativeException pne) {
45 handle = Pcaps.openOffline(inputPcapFile);
47 PcapHandleReader reader = new PcapHandleReader(handle, p -> true, clusterMatcher);
48 reader.readFromHandle();
49 clusterMatcher.performDetection();
53 * The cluster that describes the sequence of packets that this {@link ClusterMatcher} is trying to detect in the
56 private final List<List<PcapPacket>> mCluster;
59 * The ordered directions of packets in the sequences that make up {@link #mCluster}.
61 private final Conversation.Direction[] mClusterMemberDirections;
64 * For reassembling the observed traffic into TCP connections.
66 private final TcpReassembler mTcpReassembler = new TcpReassembler();
69 * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
71 private final String mRouterWanIp;
73 private final ClusterMatchObserver[] mObservers;
76 * Create a {@link ClusterMatcher}.
77 * @param cluster The cluster that traffic is matched against.
78 * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
79 * determining the direction of packets).
80 * @param detectionObservers Client code that wants to get notified whenever the {@link ClusterMatcher} detects that
81 * (a subset of) the examined traffic is similar to the traffic that makes up
82 * {@code cluster}, i.e., when the examined traffic is classified as pertaining to
85 public ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, ClusterMatchObserver... detectionObservers) {
86 mCluster = Collections.unmodifiableList(Objects.requireNonNull(cluster, "cluster cannot be null"));
87 mObservers = Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
88 if (mCluster.isEmpty() || mCluster.stream().anyMatch(inner -> inner.isEmpty())) {
89 throw new IllegalArgumentException("cluster is empty (or contains an empty inner List)");
91 if (mObservers.length == 0) {
92 throw new IllegalArgumentException("no detectionObservers provided");
94 mRouterWanIp = routerWanIp;
95 // Build the cluster members' direction sequence.
96 // Note: assumes that the provided cluster was captured within the local network (routerWanIp is set to null).
97 mClusterMemberDirections = getPacketDirections(mCluster.get(0), null);
99 * Enforce restriction on cluster members: all representatives must exhibit the same direction pattern and
100 * contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled later
101 * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
102 * in order to ensure correctness, especially during the development/debugging phase.
104 if (mCluster.stream().
105 anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
106 throw new IllegalArgumentException(
107 "cluster members must contain the same number of packets and exhibit the same packet direction " +
114 public void gotPacket(PcapPacket packet) {
115 // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
116 mTcpReassembler.gotPacket(packet);
120 * Get the cluster that describes the packet sequence that this {@link ClusterMatcher} is searching for.
121 * @return the cluster that describes the packet sequence that this {@link ClusterMatcher} is searching for.
123 public List<List<PcapPacket>> getCluster() {
127 public void performDetection() {
129 * Let's start out simple by building a version that only works for signatures that do not span across multiple
130 * TCP conversations...
132 for (Conversation c : mTcpReassembler.getTcpConversations()) {
133 if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
134 // Skip empty conversations.
138 /*List<PcapPacket> listPP = c.getPackets();
139 if(listPP.size() > 1000) {
140 for (PcapPacket pp : listPP) {
141 if (pp.length() == 639) {
142 boolean test = c.isTls();
143 System.out.println("Sequence has 639! " + test);
147 for (List<PcapPacket> signatureSequence : mCluster) {
148 if (isTlsSequence(signatureSequence) != c.isTls()) {
149 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
152 // Fetch set of packets to examine based on TLS or not.
153 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
155 * Note: we embed the attempt to detect the signature sequence in a loop in order to capture those cases
156 * where the same signature sequence appears multiple times in one Conversation.
158 * Note: as the cluster can be made up of identical sequences, we must keep track of whether we detected
159 * a match and, if so, break the inner for-each loop in order to prevent raising an alarm for each
160 * cluster-member (prevent duplicate detections of the same event). However, a negative side-effect of
161 * this is that, in doing so, we will also skip searching for subsequent different cluster members in
162 * the current conversation if the current cluster member is a match.
164 * Note: since we expect all sequences that together make up the signature to exhibit the same direction
165 * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
166 * won't have to be recomputed internally in each call to findSubsequenceInSequence().
168 Optional<List<PcapPacket>> match;
169 boolean matchFound = false;
170 while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null)).
173 List<PcapPacket> matchSeq = match.get();
174 // Notify observers about the match.
175 Arrays.stream(mObservers).forEach(o -> o.onMatch(ClusterMatcher.this, matchSeq));
177 * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
178 * signature sequence.
180 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size()-1));
181 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
182 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
185 // Break inner for-each loop in order to avoid duplicate detection of same event (see comment above)
191 * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
192 * not manage to capture every single mutation of the sequence during training.
194 * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
195 * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
202 * Checks if {@code sequence} is a sequence of TLS packets. Note: the current implementation relies on inspection
203 * of the port numbers when deciding between TLS vs. non-TLS. Therefore, only the first packet of {@code sequence}
204 * is examined as it is assumed that all packets in {@code sequence} pertain to the same {@link Conversation} and
205 * hence share the same set of two src/dst port numbers (albeit possibly alternating between which one is the src
206 * and which one is the dst, as packets in {@code sequence} may be in alternating directions).
207 * @param sequence The sequence of packets for which it is to be determined if it is a sequence of TLS packets or
209 * @return {@code true} if {@code sequence} is a sequence of TLS packets, {@code false} otherwise.
211 private boolean isTlsSequence(List<PcapPacket> sequence) {
212 // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
213 PcapPacket firstPkt = sequence.get(0);
214 int srcPort = getSourcePort(firstPkt);
215 int dstPort = getDestinationPort(firstPkt);
216 return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
220 * Examine if a given sequence of packets ({@code sequence}) contains a given shorter sequence of packets
221 * ({@code subsequence}). Note: the current implementation actually searches for a substring as it does not allow
222 * for interleaving packets in {@code sequence} that are not in {@code subsequence}; for example, if
223 * {@code subsequence} consists of packet lengths [2, 3, 5] and {@code sequence} consists of packet lengths
224 * [2, 3, 4, 5], the result will be that there is no match (because of the interleaving 4). If we are to allow
225 * interleaving packets, we need a modified version of
226 * <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
228 * @param subsequence The sequence to search for.
229 * @param sequence The sequence to search.
230 * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
231 * {@code subsequenceDirections[i]} is the direction of the packet returned by
232 * {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
233 * internally compute the packet directions.
234 * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
235 * {@code sequenceDirections[i]} is the direction of the packet returned by
236 * {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
237 * compute the packet directions.
239 * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
240 * {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
242 private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
243 List<PcapPacket> sequence,
244 Conversation.Direction[] subsequenceDirections,
245 Conversation.Direction[] sequenceDirections) {
246 if (sequence.size() < subsequence.size()) {
247 // If subsequence is longer, it cannot be contained in sequence.
248 return Optional.empty();
250 if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
251 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
252 return Optional.empty();
254 // If packet directions have not been precomputed by calling code, we need to construct them.
255 if (subsequenceDirections == null) {
256 subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
258 if (sequenceDirections == null) {
259 sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
263 while (seqIdx < sequence.size()) {
264 PcapPacket subseqPkt = subsequence.get(subseqIdx);
265 PcapPacket seqPkt = sequence.get(seqIdx);
266 // We only have a match if packet lengths and directions match.
267 if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
268 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
269 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
272 if (subseqIdx == subsequence.size()) {
273 // We managed to match the entire subsequence in sequence.
274 // Return the sublist of sequence that matches subsequence.
277 * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
280 return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
286 * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
287 * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
288 * leave seqIdx untouched.
293 * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
294 * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
301 return Optional.empty();
305 * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
306 * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
307 * index in the input list.
308 * @param packets The list of packets for which to construct a corresponding array of packet directions.
309 * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
310 * the traffic is captured just outside the local network (at the ISP side of the router). Set to
311 * {@code null} if {@code packets} stem from traffic captured within the local network.
312 * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
313 * corresponding index in {@code packets}.
315 private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
316 Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
317 for (int i = 0; i < packets.size(); i++) {
318 PcapPacket pkt = packets.get(i);
319 if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
320 // Sanity check: we shouldn't be processing loopback traffic
321 throw new AssertionError("loopback traffic detected");
323 if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
324 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
325 } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
326 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
328 throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");
335 * Interface used by client code to register for receiving a notification whenever the {@link ClusterMatcher}
336 * detects traffic that is similar to the traffic that makes up the cluster returned by
337 * {@link ClusterMatcher#getCluster()}.
339 interface ClusterMatchObserver {
341 * Callback that is invoked whenever a sequence that is similar to a sequence associated with the cluster (i.e.,
342 * a sequence is a member of the cluster) is detected in the traffic that the associated {@link ClusterMatcher}
344 * @param clusterMatcher The {@link ClusterMatcher} that detected a match (classified traffic as pertaining to
345 * its associated cluster).
346 * @param match The traffic that was deemed to match the cluster associated with {@code clusterMatcher}.
348 void onMatch(ClusterMatcher clusterMatcher, List<PcapPacket> match);