1 package edu.uci.iotproject.detection.layer3;
3 import edu.uci.iotproject.detection.AbstractClusterMatcher;
4 import edu.uci.iotproject.detection.ClusterMatcherObserver;
5 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
6 import edu.uci.iotproject.trafficreassembly.layer3.TcpReassembler;
7 import edu.uci.iotproject.analysis.TcpConversationUtils;
8 import edu.uci.iotproject.io.PcapHandleReader;
9 import edu.uci.iotproject.util.PrintUtils;
10 import org.pcap4j.core.*;
12 import java.time.ZoneId;
14 import java.util.stream.Collectors;
16 import static edu.uci.iotproject.util.PcapPacketUtils.*;
19 * Searches a traffic trace for sequences of packets "belong to" a given cluster (in other words, attempts to classify
20 * traffic as pertaining to a given cluster).
22 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
23 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
25 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
28 public static void main(String[] args) throws PcapNativeException, NotOpenException {
30 // String path = "/scratch/July-2018"; // Rahmadi
31 // String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
32 // final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
33 // final String signatureFile = path + "/2018-07/dlink/offSignature1.sig";
35 // List<List<PcapPacket>> signature = PrintUtils.deserializeClustersFromFile(signatureFile);
36 // Layer3ClusterMatcher clusterMatcher = new Layer3ClusterMatcher(signature, null,
37 // (sig, match) -> System.out.println(
38 // String.format("[ !!! SIGNATURE DETECTED AT %s !!! ]",
39 // match.get(0).getTimestamp().atZone(ZoneId.of("America/Los_Angeles")))
45 // handle = Pcaps.openOffline(inputPcapFile, PcapHandle.TimestampPrecision.NANO);
46 // } catch (PcapNativeException pne) {
47 // handle = Pcaps.openOffline(inputPcapFile);
49 // PcapHandleReader reader = new PcapHandleReader(handle, p -> true, clusterMatcher);
50 // reader.readFromHandle();
51 // clusterMatcher.performDetection();
55 * The ordered directions of packets in the sequences that make up {@link #mCluster}.
57 private final Conversation.Direction[] mClusterMemberDirections;
60 * For reassembling the observed traffic into TCP connections.
62 private final TcpReassembler mTcpReassembler = new TcpReassembler();
65 * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
67 private final String mRouterWanIp;
70 * Range-based vs. strict matching.
72 private final boolean mRangeBased;
75 * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
77 private final double mEps;
80 * Create a {@link Layer3ClusterMatcher}.
81 * @param cluster The cluster that traffic is matched against.
82 * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
83 * determining the direction of packets).
84 * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
85 * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
86 * (a subset of) the examined traffic is similar to the traffic that makes up
87 * {@code cluster}, i.e., when the examined traffic is classified as pertaining to
90 public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, boolean isRangeBased, double eps,
91 ClusterMatcherObserver... detectionObservers) {
92 super(cluster, isRangeBased);
93 Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
94 for (ClusterMatcherObserver obs : detectionObservers) {
97 // Build the cluster members' direction sequence.
98 // Note: assumes that the provided cluster was captured within the local network (routerWanIp is set to null).
99 mClusterMemberDirections = getPacketDirections(cluster.get(0), null);
101 * Enforce restriction on cluster members: all representatives must exhibit the same direction pattern and
102 * contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled later
103 * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
104 * in order to ensure correctness, especially during the development/debugging phase.
106 mRangeBased = isRangeBased;
107 if (!mRangeBased) { // Only when it is not range-based
108 if (mCluster.stream().
109 anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
110 throw new IllegalArgumentException(
111 "cluster members must contain the same number of packets and exhibit the same packet direction " +
117 mRouterWanIp = routerWanIp;
121 public void gotPacket(PcapPacket packet) {
122 // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
123 mTcpReassembler.gotPacket(packet);
127 * Get the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
128 * @return the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
130 public List<List<PcapPacket>> getCluster() {
134 public void performDetectionRangeBased() {
136 * Let's start out simple by building a version that only works for signatures that do not span across multiple
137 * TCP conversations...
139 for (Conversation c : mTcpReassembler.getTcpConversations()) {
140 if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
141 // Skip empty conversations.
144 List<PcapPacket> lowerBound = mCluster.get(0);
145 List<PcapPacket> upperBound = mCluster.get(1);
146 if (isTlsSequence(lowerBound) != c.isTls() || isTlsSequence(upperBound) != c.isTls()) {
147 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
150 // Fetch set of packets to examine based on TLS or not.
151 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
152 Optional<List<PcapPacket>> match;
153 while ((match = findSubsequenceInSequence(lowerBound, upperBound, cPkts, mClusterMemberDirections, null)).
155 List<PcapPacket> matchSeq = match.get();
156 // Notify observers about the match.
157 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
159 * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
160 * signature sequence.
162 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
163 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
164 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
169 public void performDetectionConservative() {
171 * Let's start out simple by building a version that only works for signatures that do not span across multiple
172 * TCP conversations...
174 for (Conversation c : mTcpReassembler.getTcpConversations()) {
175 if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
176 // Skip empty conversations.
179 for (List<PcapPacket> signatureSequence : mCluster) {
180 if (isTlsSequence(signatureSequence) != c.isTls()) {
181 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
184 // Fetch set of packets to examine based on TLS or not.
185 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
187 * Note: we embed the attempt to detect the signature sequence in a loop in order to capture those cases
188 * where the same signature sequence appears multiple times in one Conversation.
190 * Note: since we expect all sequences that together make up the signature to exhibit the same direction
191 * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
192 * won't have to be recomputed internally in each call to findSubsequenceInSequence().
194 Optional<List<PcapPacket>> match;
195 while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null)).
197 List<PcapPacket> matchSeq = match.get();
198 // Notify observers about the match.
199 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
201 * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
202 * signature sequence.
204 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
205 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
206 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
212 * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
213 * not manage to capture every single mutation of the sequence during training.
215 * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
216 * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
223 * Checks if {@code sequence} is a sequence of TLS packets. Note: the current implementation relies on inspection
224 * of the port numbers when deciding between TLS vs. non-TLS. Therefore, only the first packet of {@code sequence}
225 * is examined as it is assumed that all packets in {@code sequence} pertain to the same {@link Conversation} and
226 * hence share the same set of two src/dst port numbers (albeit possibly alternating between which one is the src
227 * and which one is the dst, as packets in {@code sequence} may be in alternating directions).
228 * @param sequence The sequence of packets for which it is to be determined if it is a sequence of TLS packets or
230 * @return {@code true} if {@code sequence} is a sequence of TLS packets, {@code false} otherwise.
232 private boolean isTlsSequence(List<PcapPacket> sequence) {
233 // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
234 PcapPacket firstPkt = sequence.get(0);
235 int srcPort = getSourcePort(firstPkt);
236 int dstPort = getDestinationPort(firstPkt);
237 return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
241 * Examine if a given sequence of packets ({@code sequence}) contains a given shorter sequence of packets
242 * ({@code subsequence}). Note: the current implementation actually searches for a substring as it does not allow
243 * for interleaving packets in {@code sequence} that are not in {@code subsequence}; for example, if
244 * {@code subsequence} consists of packet lengths [2, 3, 5] and {@code sequence} consists of packet lengths
245 * [2, 3, 4, 5], the result will be that there is no match (because of the interleaving 4). If we are to allow
246 * interleaving packets, we need a modified version of
247 * <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
249 * @param subsequence The sequence to search for.
250 * @param sequence The sequence to search.
251 * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
252 * {@code subsequenceDirections[i]} is the direction of the packet returned by
253 * {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
254 * internally compute the packet directions.
255 * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
256 * {@code sequenceDirections[i]} is the direction of the packet returned by
257 * {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
258 * compute the packet directions.
260 * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
261 * {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
263 private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
264 List<PcapPacket> sequence,
265 Conversation.Direction[] subsequenceDirections,
266 Conversation.Direction[] sequenceDirections) {
267 if (sequence.size() < subsequence.size()) {
268 // If subsequence is longer, it cannot be contained in sequence.
269 return Optional.empty();
271 if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
272 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
273 return Optional.empty();
275 // If packet directions have not been precomputed by calling code, we need to construct them.
276 if (subsequenceDirections == null) {
277 subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
279 if (sequenceDirections == null) {
280 sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
284 while (seqIdx < sequence.size()) {
285 PcapPacket subseqPkt = subsequence.get(subseqIdx);
286 PcapPacket seqPkt = sequence.get(seqIdx);
287 // We only have a match if packet lengths and directions match.
288 if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
289 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
290 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
293 if (subseqIdx == subsequence.size()) {
294 // We managed to match the entire subsequence in sequence.
295 // Return the sublist of sequence that matches subsequence.
298 * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
301 return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
307 * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
308 * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
309 * leave seqIdx untouched.
314 * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
315 * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
322 return Optional.empty();
326 * Overloading the method {@code findSubsequenceInSequence} for range-based matching. Instead of a sequence,
327 * we have sequences of lower and upper bounds.
329 * @param lowerBound The lower bound of the sequence we search for.
330 * @param upperBound The upper bound of the sequence we search for.
331 * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
332 * {@code subsequenceDirections[i]} is the direction of the packet returned by
333 * {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
334 * internally compute the packet directions.
335 * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
336 * {@code sequenceDirections[i]} is the direction of the packet returned by
337 * {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
338 * compute the packet directions.
340 * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
341 * {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
343 private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
344 List<PcapPacket> upperBound,
345 List<PcapPacket> sequence,
346 Conversation.Direction[] subsequenceDirections,
347 Conversation.Direction[] sequenceDirections) {
348 // Just do the checks for either lower or upper bound!
349 // TODO: For now we use just the lower bound
350 if (sequence.size() < lowerBound.size()) {
351 // If subsequence is longer, it cannot be contained in sequence.
352 return Optional.empty();
354 if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
355 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
356 return Optional.empty();
358 // If packet directions have not been precomputed by calling code, we need to construct them.
359 if (subsequenceDirections == null) {
360 subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
362 if (sequenceDirections == null) {
363 sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
367 while (seqIdx < sequence.size()) {
368 PcapPacket lowBndPkt = lowerBound.get(subseqIdx);
369 PcapPacket upBndPkt = upperBound.get(subseqIdx);
370 PcapPacket seqPkt = sequence.get(seqIdx);
371 // We only have a match if packet lengths and directions match.
372 // The packet lengths have to be in the range of [lowerBound - eps, upperBound+eps]
373 // TODO: Maybe we could do better here for the double to integer conversion?
374 int epsLowerBound = lowBndPkt.length() - (int) mEps;
375 int epsUpperBound = upBndPkt.length() + (int) mEps;
376 if (epsLowerBound <= seqPkt.getOriginalLength() &&
377 seqPkt.getOriginalLength() <= epsUpperBound &&
378 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
379 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
382 if (subseqIdx == lowerBound.size()) {
383 // We managed to match the entire subsequence in sequence.
384 // Return the sublist of sequence that matches subsequence.
387 * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
390 return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
396 * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
397 * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
398 * leave seqIdx untouched.
403 * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
404 * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
411 return Optional.empty();
415 * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
416 * members. Two cluster members are considered identical if their packets lengths and packet directions are
417 * identical. The resulting pruned cluster is unmodifiable (this applies to both the outermost list as well as the
418 * nested lists) in order to preserve its integrity when exposed to external code (e.g., through
419 * {@link #getCluster()}).
421 * @param cluster A cluster to prune.
422 * @return The resulting pruned cluster.
425 protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
426 List<List<PcapPacket>> prunedCluster = new ArrayList<>();
427 for (List<PcapPacket> originalClusterSeq : cluster) {
428 boolean alreadyPresent = false;
429 for (List<PcapPacket> prunedClusterSeq : prunedCluster) {
430 Optional<List<PcapPacket>> duplicate = findSubsequenceInSequence(originalClusterSeq, prunedClusterSeq,
431 mClusterMemberDirections, mClusterMemberDirections);
432 if (duplicate.isPresent()) {
433 alreadyPresent = true;
437 if (!alreadyPresent) {
438 prunedCluster.add(Collections.unmodifiableList(originalClusterSeq));
441 return Collections.unmodifiableList(prunedCluster);
445 * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
446 * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
447 * index in the input list.
448 * @param packets The list of packets for which to construct a corresponding array of packet directions.
449 * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
450 * the traffic is captured just outside the local network (at the ISP side of the router). Set to
451 * {@code null} if {@code packets} stem from traffic captured within the local network.
452 * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
453 * corresponding index in {@code packets}.
455 private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
456 Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
457 for (int i = 0; i < packets.size(); i++) {
458 PcapPacket pkt = packets.get(i);
459 if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
460 // Sanity check: we shouldn't be processing loopback traffic
461 throw new AssertionError("loopback traffic detected");
463 if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
464 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
465 } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
466 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
468 //throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");