1 package edu.uci.iotproject.detection.layer2;
3 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
6 import edu.uci.iotproject.detection.AbstractClusterMatcher;
7 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
8 import org.pcap4j.core.*;
10 import java.util.ArrayList;
11 import java.util.HashMap;
12 import java.util.List;
14 import java.util.function.Function;
17 * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
19 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
20 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
22 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
25 * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
26 * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
27 * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
29 private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
30 private final Map<Layer2Flow, Layer2RangeMatcher[]> mPerFlowRangeMatcher = new HashMap<>();
32 private final Function<Layer2Flow, Boolean> mFlowFilter;
35 * Specifying range-based instead of conservative exact matching.
37 private final boolean mRangeBased;
40 * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
42 private final double mEps;
45 * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
46 * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
48 public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, boolean isRangeBased, double eps) {
49 // Consider all flows if no flow filter specified.
50 this(cluster, flow -> true, isRangeBased, eps);
54 * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
55 * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
56 * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
57 * search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
58 * will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
59 * namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
60 * the new flow. This functionality may for example come in handy when one only wants to search
61 * for matches in the subset of flows that involves a specific (range of) MAC(s).
62 * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
63 * @param eps The epsilon value used in the DBSCAN algorithm.
65 public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
66 boolean isRangeBased, double eps) {
67 super(cluster, isRangeBased);
68 mFlowFilter = flowFilter;
69 mRangeBased = isRangeBased;
74 public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
76 rangeBasedMatching(flow, newPacket);
78 conservativeMatching(flow, newPacket);
82 private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
83 if (mPerFlowSeqMatchers.get(flow) == null) {
84 // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
85 // All sequences of the cluster have the same length, so we only need to compute the length of the nested
86 // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
87 // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
88 // around), so the length of the inner array is simply the sequence length.
89 Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
90 // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
91 for (int i = 0; i < matchers.length; i++) {
92 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i));
94 // Associate the new sequence matcher table with the new flow
95 mPerFlowSeqMatchers.put(flow, matchers);
97 // Fetch table that contains sequence matchers for this flow.
98 Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
99 // Present the packet to all sequence matchers.
100 for (int i = 0; i < matchers.length; i++) {
101 // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
102 // the sequence matchers that have advanced the most in the special case where the searched sequence
103 // contains two packets of the same length going in the same direction.
104 for (int j = matchers[i].length - 1; j >= 0 ; j--) {
105 Layer2SequenceMatcher sm = matchers[i][j];
107 // There is currently no sequence matcher that has managed to match j packets.
110 boolean matched = sm.matchPacket(newPacket);
112 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
113 // Sequence matcher has a match. Report it to observers.
114 mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
115 // Remove the now terminated sequence matcher.
116 matchers[i][j] = null;
118 // Sequence matcher advanced one step, so move it to its corresponding new position iff the
119 // packet that advanced it has a later timestamp than that of the last matched packet of the
120 // sequence matcher at the new index, if any. In most traces, a small amount of the packets
121 // appear out of order (with regards to their timestamp), which is why this check is required.
122 // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
124 if (matchers[i][j+1] == null ||
125 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
126 matchers[i][j+1] = sm;
129 // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
130 // from state zero completed its matching or if it replaced a different one in state 1 or not.
131 if (sm.getMatchedPacketsCount() == 1) {
132 matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence());
139 private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
140 // TODO: For range-based matching, we only care about matching a range; therefore it is a matcher array.
141 if (mPerFlowRangeMatcher.get(flow) == null) {
142 // If this is the first time we encounter this flow, we need to set up a sequence matcher.
143 // All sequences of the cluster have the same length, so we only need to compute the length of the
144 // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
145 // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
146 // around), so the length of the array is simply the sequence length.
147 Layer2RangeMatcher[] matcher = new Layer2RangeMatcher[mCluster.get(0).size()];
148 // Prepare a "state 0" sequence matcher.
149 matcher[0] = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1), mEps);
150 // Associate the new sequence matcher table with the new flow.
151 mPerFlowRangeMatcher.put(flow, matcher);
153 // Fetch table that contains sequence matchers for this flow.
154 Layer2RangeMatcher[] matcher = mPerFlowRangeMatcher.get(flow);
155 // Present packet to the sequence matcher.
156 for (int j = matcher.length - 1; j >= 0; j--) {
157 Layer2RangeMatcher sm = matcher[j];
159 // There is currently no sequence matcher that has managed to match j packets.
162 boolean matched = sm.matchPacket(newPacket);
164 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
165 // Sequence matcher has a match. Report it to observers.
166 mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
167 // Remove the now terminated sequence matcher.
170 // Sequence matcher advanced one step, so move it to its corresponding new position iff the
171 // packet that advanced it has a later timestamp than that of the last matched packet of the
172 // sequence matcher at the new index, if any. In most traces, a small amount of the packets
173 // appear out of order (with regards to their timestamp), which is why this check is required.
174 // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
176 if (matcher[j+1] == null ||
177 newPacket.getTimestamp().isAfter(matcher[j+1].getLastPacket().getTimestamp())) {
181 // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
182 // from state zero completed its matching or if it replaced a different one in state 1 or not.
183 if (sm.getMatchedPacketsCount() == 1) {
184 matcher[j] = new Layer2RangeMatcher(sm.getTargetLowerBound(), sm.getTargetUpperBound(), mEps);
191 protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
192 // Note: we assume that all sequences in the input cluster are of the same length and that their packet
193 // directions are identical.
194 List<List<PcapPacket>> prunedCluster = new ArrayList<>();
195 for (List<PcapPacket> originalClusterSeq : cluster) {
196 boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
197 for (int i = 0; i < pcPkts.size(); i++) {
198 if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
204 if (!alreadyPresent) {
205 // Add the sequence if not already present in the pruned cluster.
206 prunedCluster.add(originalClusterSeq);
209 return prunedCluster;
212 private static final boolean DEBUG = false;
215 public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
216 // New flow detected. Check if we should consider it when searching for cluster member matches.
217 if (mFlowFilter.apply(newFlow)) {
219 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
221 // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
222 newFlow.addFlowObserver(this);
224 System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");