1 package edu.uci.iotproject.detection.layer2;
3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
6 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
7 import edu.uci.iotproject.detection.AbstractClusterMatcher;
8 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
9 import org.pcap4j.core.*;
11 import java.util.ArrayList;
12 import java.util.HashMap;
13 import java.util.List;
15 import java.util.function.Function;
18 * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
20 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
21 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
23 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
26 * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
27 * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
28 * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
30 private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
31 private final Map<Layer2Flow, List<Layer2RangeMatcher>> mPerFlowRangeMatcher = new HashMap<>();
33 private final Function<Layer2Flow, Boolean> mFlowFilter;
36 * Specifying range-based instead of conservative exact matching.
38 private final boolean mRangeBased;
41 * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
43 private final double mEps;
45 private int mInclusionTimeMillis;
48 * Keeping track of maximum number of skipped packets
50 private int mMaxSkippedPackets;
51 // private List<Integer> mMaxSkippedPackets;
53 private int mLimitSkippedPackets;
56 * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
57 * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
59 public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, int inclusionTimeMillis,
60 boolean isRangeBased, double eps, int limitSkippedPackets) {
61 // Consider all flows if no flow filter specified.
62 this(cluster, flow -> true, inclusionTimeMillis, isRangeBased, eps, limitSkippedPackets);
66 * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
67 * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
68 * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
69 * search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
70 * will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
71 * namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
72 * the new flow. This functionality may for example come in handy when one only wants to search
73 * for matches in the subset of flows that involves a specific (range of) MAC(s).
74 * @param inclusionTimeMillis Packet inclusion time limit for matching.
75 * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
76 * @param eps The epsilon value used in the DBSCAN algorithm.
78 public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
79 int inclusionTimeMillis, boolean isRangeBased, double eps, int limitSkippedPackets) {
80 super(cluster, isRangeBased);
81 mFlowFilter = flowFilter;
82 mRangeBased = isRangeBased;
84 mInclusionTimeMillis =
85 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
86 mMaxSkippedPackets = 0;
87 // mMaxSkippedPackets = new ArrayList<>();
88 // Give integer's MAX_VALUE if -1
89 mLimitSkippedPackets = limitSkippedPackets == -1 ? Integer.MAX_VALUE : limitSkippedPackets;
93 public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
95 rangeBasedMatching(flow, newPacket);
97 conservativeMatching(flow, newPacket);
101 private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
102 if (mPerFlowSeqMatchers.get(flow) == null) {
103 // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
104 // All sequences of the cluster have the same length, so we only need to compute the length of the nested
105 // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
106 // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
107 // around), so the length of the inner array is simply the sequence length.
108 Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
109 // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
110 for (int i = 0; i < matchers.length; i++) {
111 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis);
113 // Associate the new sequence matcher table with the new flow
114 mPerFlowSeqMatchers.put(flow, matchers);
116 // Fetch table that contains sequence matchers for this flow.
117 Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
118 // Present the packet to all sequence matchers.
119 for (int i = 0; i < matchers.length; i++) {
120 // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
121 // the sequence matchers that have advanced the most in the special case where the searched sequence
122 // contains two packets of the same length going in the same direction.
123 for (int j = matchers[i].length - 1; j >= 0 ; j--) {
124 Layer2SequenceMatcher sm = matchers[i][j];
126 // There is currently no sequence matcher that has managed to match j packets.
129 boolean matched = sm.matchPacket(newPacket);
131 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
132 // Update maximum skipped packets
133 boolean stillMatch = checkMaxSkippedPackets(flow.getPackets(), sm.getMatchedPackets());
135 // Sequence matcher has a match. Report it to observers.
136 mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
138 // Remove the now terminated sequence matcher.
139 matchers[i][j] = null;
141 // Sequence matcher advanced one step, so move it to its corresponding new position iff the
142 // packet that advanced it has a later timestamp than that of the last matched packet of the
143 // sequence matcher at the new index, if any. In most traces, a small amount of the packets
144 // appear out of order (with regards to their timestamp), which is why this check is required.
145 // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
147 if (matchers[i][j+1] == null ||
148 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
149 matchers[i][j+1] = sm;
152 // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
153 // from state zero completed its matching or if it replaced a different one in state 1 or not.
154 if (sm.getMatchedPacketsCount() == 1) {
155 matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis);
162 // Update the maximum number of skipped packets
163 private boolean checkMaxSkippedPackets(List<PcapPacket> flowPackets, List<PcapPacket> matchedPackets) {
164 // Count number of skipped packets by looking into
165 // the difference of indices of two matched packets
166 boolean stillMatch = true;
167 for(int i = 1; i < matchedPackets.size(); ++i) {
168 int currIndex = flowPackets.indexOf(matchedPackets.get(i-1));
169 int nextIndex = flowPackets.indexOf(matchedPackets.get(i));
170 int skippedPackets = nextIndex - currIndex;
171 if (mMaxSkippedPackets < skippedPackets) {
172 mMaxSkippedPackets = skippedPackets;
175 // mMaxSkippedPackets.add(skippedPackets);
180 private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
181 // TODO: For range-based matching, we need to create a new matcher every time we see the first element of
182 // the sequence (between lower and upper bounds).
183 if (mPerFlowRangeMatcher.get(flow) == null) {
184 // If this is the first time we encounter this flow, we need to set up a list of sequence matchers.
185 List<Layer2RangeMatcher> listMatchers = new ArrayList<>();
186 // Prepare a "state 0" sequence matcher.
187 Layer2RangeMatcher matcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
188 mInclusionTimeMillis, mEps);
189 listMatchers.add(matcher);
190 // Associate the new sequence matcher table with the new flow.
191 mPerFlowRangeMatcher.put(flow, listMatchers);
193 // Fetch table that contains sequence matchers for this flow.
194 List<Layer2RangeMatcher> listMatchers = mPerFlowRangeMatcher.get(flow);
195 // Add a new matcher if all matchers have already advanced to the next stage.
196 // We always need a new matcher to match from NO packets.
197 boolean addOneArray = true;
198 for(Layer2RangeMatcher matcher : listMatchers) {
199 if (matcher.getMatchedPacketsCount() == 0) {
203 // Add the new matcher into the list
205 Layer2RangeMatcher newMatcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
206 mInclusionTimeMillis, mEps);
207 listMatchers.add(newMatcher);
209 // Present packet to the sequence matchers.
210 // Make a shallow copy of the list so that we can clean up the actual list when a matcher is terminated.
211 // Otherwise, we would get an exception for changing the list while iterating on it.
212 List<Layer2RangeMatcher> listMatchersCopy = new ArrayList<>(listMatchers);
213 for(Layer2RangeMatcher matcher : listMatchersCopy) {
214 Layer2RangeMatcher sm = matcher;
215 // Check if no packets are matched yet or if there are matched packets, the next packet to be matched
216 // has to be later than the last matched packet.
217 // In most traces, a small amount of the packets appear out of order (with regards to their timestamp),
218 // which is why this check is required.
219 // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
221 if (sm.getMatchedPacketsCount() == 0 ||
222 newPacket.getTimestamp().isAfter(sm.getLastPacket().getTimestamp())) {
223 boolean matched = sm.matchPacket(newPacket);
225 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
226 // Update maximum skipped packets
227 boolean stillMatch = checkMaxSkippedPackets(flow.getPackets(), sm.getMatchedPackets());
229 // Sequence matcher has a match. Report it to observers.
230 mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
232 // Terminate sequence matcher since matching is complete.
233 listMatchers.remove(matcher);
241 protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
242 // Note: we assume that all sequences in the input cluster are of the same length and that their packet
243 // directions are identical.
244 List<List<PcapPacket>> prunedCluster = new ArrayList<>();
245 for (List<PcapPacket> originalClusterSeq : cluster) {
246 boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
247 for (int i = 0; i < pcPkts.size(); i++) {
248 if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
254 if (!alreadyPresent) {
255 // Add the sequence if not already present in the pruned cluster.
256 prunedCluster.add(originalClusterSeq);
259 return prunedCluster;
262 private static final boolean DEBUG = false;
265 public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
266 // New flow detected. Check if we should consider it when searching for cluster member matches.
267 if (mFlowFilter.apply(newFlow)) {
269 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
271 // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
272 newFlow.addFlowObserver(this);
274 System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");
279 * Return the maximum number of skipped packets.
281 public int getMaxSkippedPackets() {
282 return mMaxSkippedPackets;
284 // public List<Integer> getMaxSkippedPackets() {
285 // return mMaxSkippedPackets;