Programming Languages Research Group: Git - pingpong.git/blob

   1 package edu.uci.iotproject.detection.layer2;
   2
   3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
   4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
   5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
   6 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
   7 import edu.uci.iotproject.detection.AbstractClusterMatcher;
   8 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
   9 import org.pcap4j.core.*;
  10
  11 import java.util.ArrayList;
  12 import java.util.HashMap;
  13 import java.util.List;
  14 import java.util.Map;
  15 import java.util.function.Function;
  16
  17 /**
  18  * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
  19  *
  20  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
  21  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
  22  */
  23 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
  24
  25     /**
  26      * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
  27      * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
  28      * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
  29      */
  30     private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
  31     private final Map<Layer2Flow, Layer2RangeMatcher[]> mPerFlowRangeMatcher = new HashMap<>();
  32
  33     private final Function<Layer2Flow, Boolean> mFlowFilter;
  34
  35     /**
  36      * Specifying range-based instead of conservative exact matching.
  37      */
  38     private final boolean mRangeBased;
  39
  40     /**
  41      * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
  42      */
  43     private final double mEps;
  44
  45     private int mInclusionTimeMillis;
  46
  47     /**
  48      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
  49      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
  50      */
  51     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, int inclusionTimeMillis,
  52                                 boolean isRangeBased, double eps) {
  53         // Consider all flows if no flow filter specified.
  54         this(cluster, flow -> true, inclusionTimeMillis, isRangeBased, eps);
  55     }
  56
  57     /**
  58      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
  59      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
  60      * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
  61      *                   search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
  62      *                   will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
  63      *                   namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
  64      *                   the new flow. This functionality may for example come in handy when one only wants to search
  65      *                   for matches in the subset of flows that involves a specific (range of) MAC(s).
  66      * @param inclusionTimeMillis Packet inclusion limit for matching.
  67      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
  68      * @param eps The epsilon value used in the DBSCAN algorithm.
  69      */
  70     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
  71                                 int inclusionTimeMillis, boolean isRangeBased, double eps) {
  72         super(cluster, isRangeBased);
  73         mFlowFilter = flowFilter;
  74         mRangeBased = isRangeBased;
  75         mEps = eps;
  76         mInclusionTimeMillis =
  77                 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
  78     }
  79
  80     @Override
  81     public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
  82         if (mRangeBased) {
  83             rangeBasedMatching(flow, newPacket);
  84         } else {
  85             conservativeMatching(flow, newPacket);
  86         }
  87     }
  88
  89     private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
  90         if (mPerFlowSeqMatchers.get(flow) == null) {
  91             // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
  92             // All sequences of the cluster have the same length, so we only need to compute the length of the nested
  93             // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
  94             // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
  95             // around), so the length of the inner array is simply the sequence length.
  96             Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
  97             // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
  98             for (int i = 0; i < matchers.length; i++) {
  99                 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis);
 100             }
 101             // Associate the new sequence matcher table with the new flow
 102             mPerFlowSeqMatchers.put(flow, matchers);
 103         }
 104         // Fetch table that contains sequence matchers for this flow.
 105         Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
 106         // Present the packet to all sequence matchers.
 107         for (int i = 0; i < matchers.length; i++) {
 108             // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
 109             // the sequence matchers that have advanced the most in the special case where the searched sequence
 110             // contains two packets of the same length going in the same direction.
 111             for (int j = matchers[i].length - 1; j >= 0 ; j--) {
 112                 Layer2SequenceMatcher sm = matchers[i][j];
 113                 if (sm == null) {
 114                     // There is currently no sequence matcher that has managed to match j packets.
 115                     continue;
 116                 }
 117                 boolean matched = sm.matchPacket(newPacket);
 118                 if (matched) {
 119                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 120                         // Sequence matcher has a match. Report it to observers.
 121                         mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 122                         // Remove the now terminated sequence matcher.
 123                         matchers[i][j] = null;
 124                     } else {
 125                         // Sequence matcher advanced one step, so move it to its corresponding new position iff the
 126                         // packet that advanced it has a later timestamp than that of the last matched packet of the
 127                         // sequence matcher at the new index, if any. In most traces, a small amount of the packets
 128                         // appear out of order (with regards to their timestamp), which is why this check is required.
 129                         // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 130                         // order here.
 131                         if (matchers[i][j+1] == null ||
 132                                 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
 133                             matchers[i][j+1] = sm;
 134                         }
 135                     }
 136                     // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
 137                     // from state zero completed its matching or if it replaced a different one in state 1 or not.
 138                     if (sm.getMatchedPacketsCount() == 1) {
 139                         matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis);
 140                     }
 141                 }
 142             }
 143         }
 144     }
 145
 146     private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
 147         // TODO: For range-based matching, we only care about matching a range; therefore it is a matcher array.
 148         if (mPerFlowRangeMatcher.get(flow) == null) {
 149             // If this is the first time we encounter this flow, we need to set up a sequence matcher.
 150             // All sequences of the cluster have the same length, so we only need to compute the length of the
 151             // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
 152             // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
 153             // around), so the length of the array is simply the sequence length.
 154             Layer2RangeMatcher[] matcher = new Layer2RangeMatcher[mCluster.get(0).size()];
 155             // Prepare a "state 0" sequence matcher.
 156             matcher[0] = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1), mInclusionTimeMillis, mEps);
 157             // Associate the new sequence matcher table with the new flow.
 158             mPerFlowRangeMatcher.put(flow, matcher);
 159         }
 160         // Fetch table that contains sequence matchers for this flow.
 161         Layer2RangeMatcher[] matcher = mPerFlowRangeMatcher.get(flow);
 162         // Present packet to the sequence matcher.
 163         for (int j = matcher.length - 1; j >= 0; j--) {
 164             Layer2RangeMatcher sm = matcher[j];
 165             if (sm == null) {
 166                 // There is currently no sequence matcher that has managed to match j packets.
 167                 continue;
 168             }
 169             boolean matched = sm.matchPacket(newPacket);
 170             if (matched) {
 171                 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 172                     // Sequence matcher has a match. Report it to observers.
 173                     mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 174                     // Remove the now terminated sequence matcher.
 175                     matcher[j] = null;
 176                 } else {
 177                     // Sequence matcher advanced one step, so move it to its corresponding new position iff the
 178                     // packet that advanced it has a later timestamp than that of the last matched packet of the
 179                     // sequence matcher at the new index, if any. In most traces, a small amount of the packets
 180                     // appear out of order (with regards to their timestamp), which is why this check is required.
 181                     // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 182                     // order here.
 183                     if (matcher[j+1] == null ||
 184                             newPacket.getTimestamp().isAfter(matcher[j+1].getLastPacket().getTimestamp())) {
 185                         matcher[j+1] = sm;
 186                     }
 187                 }
 188                 // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
 189                 // from state zero completed its matching or if it replaced a different one in state 1 or not.
 190                 if (sm.getMatchedPacketsCount() == 1) {
 191                     matcher[j] = new Layer2RangeMatcher(sm.getTargetLowerBound(), sm.getTargetUpperBound(),
 192                             mInclusionTimeMillis, mEps);
 193                 }
 194             }
 195         }
 196     }
 197
 198     @Override
 199     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
 200         // Note: we assume that all sequences in the input cluster are of the same length and that their packet
 201         // directions are identical.
 202         List<List<PcapPacket>> prunedCluster = new ArrayList<>();
 203         for (List<PcapPacket> originalClusterSeq : cluster) {
 204             boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
 205                 for (int i = 0; i < pcPkts.size(); i++) {
 206                     if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
 207                         return false;
 208                     }
 209                 }
 210                 return true;
 211             });
 212             if (!alreadyPresent) {
 213                 // Add the sequence if not already present in the pruned cluster.
 214                 prunedCluster.add(originalClusterSeq);
 215             }
 216         }
 217         return prunedCluster;
 218     }
 219
 220     private static final boolean DEBUG = false;
 221
 222     @Override
 223     public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
 224         // New flow detected. Check if we should consider it when searching for cluster member matches.
 225         if (mFlowFilter.apply(newFlow)) {
 226             if (DEBUG) {
 227                 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
 228             }
 229             // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
 230             newFlow.addFlowObserver(this);
 231         } else if (DEBUG) {
 232             System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");
 233         }
 234     }
 235 }