1 package edu.uci.iotproject;
3 import static edu.uci.iotproject.analysis.UserAction.Type;
5 import edu.uci.iotproject.analysis.*;
6 import edu.uci.iotproject.io.TriggerTimesFileReader;
7 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
8 import edu.uci.iotproject.trafficreassembly.layer3.TcpReassembler;
9 import edu.uci.iotproject.util.PcapPacketUtils;
10 import edu.uci.iotproject.util.PrintUtils;
11 import org.apache.commons.math3.stat.clustering.Cluster;
12 import org.apache.commons.math3.stat.clustering.DBSCANClusterer;
13 import org.pcap4j.core.*;
14 import org.pcap4j.packet.namednumber.DataLinkType;
16 import java.io.EOFException;
17 import java.net.UnknownHostException;
18 import java.time.Duration;
19 import java.time.Instant;
21 import java.util.concurrent.TimeoutException;
22 import java.util.stream.Collectors;
23 import java.util.stream.Stream;
26 * This is a system that reads PCAP files to compare
27 * patterns of DNS hostnames, packet sequences, and packet
28 * lengths with training data to determine certain events
29 * or actions for smart home devices.
31 * @author Janus Varmarken
32 * @author Rahmadi Trimananda (rtrimana@uci.edu)
35 public class SignatureGenerator {
38 public static void main(String[] args) throws PcapNativeException, NotOpenException, EOFException, TimeoutException, UnknownHostException {
39 // -------------------------------------------------------------------------------------------------------------
40 // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------
41 if (args.length < 11) {
42 String errMsg = String.format("Usage: %s inputPcapFile outputPcapFile triggerTimesFile deviceIp" +
43 " onSignatureFile offSignatureFile onClusterAnalysisFile offClusterAnalysisFile epsilon" +
44 " deletedSequencesOn deletedSequencesOff" +
45 "\n inputPcapFile: the target of the detection" +
46 "\n outputPcapFile: the processed PCAP file through 15-second window filtering" +
47 "\n triggerTimesFile: the trigger timestamps" +
48 "\n deviceIp: the IP address of the device we want to generate a signature for" +
49 "\n onSignatureFile: name of the ON signature file" +
50 "\n offSignatureFile: name of the OFF signature file" +
51 "\n onClusterAnalysisFile: name of the ON signature cluster analysis file" +
52 "\n offClusterAnalysisFile: name of the OFF signature cluster analysis file" +
53 "\n epsilon: epsilon value of the DBSCAN algorithm" +
54 "\n deletedSequencesOn: sequences to be deleted from the final ON signature" +
55 " (please separate with commas, e.g., 0,1,2, or put '-1' if not needed)" +
56 "\n deletedSequencesOff: sequences to be deleted from the final OFF signature" +
57 " (please separate with commas, e.g., 0,1,2, or put '-1' if not needed)",
58 SignatureGenerator.class.getSimpleName());
59 System.out.println(errMsg);
62 boolean verbose = true;
63 final String inputPcapFile = args[0];
64 final String outputPcapFile = args[1];
65 final String triggerTimesFile = args[2];
66 final String deviceIp = args[3];
67 final String onSignatureFile = args[4];
68 final String offSignatureFile = args[5];
69 final String onClusterAnalysisFile = args[6];
70 final String offClusterAnalysisFile = args[7];
71 final double eps = Double.parseDouble(args[8]);
72 final String deletedSequencesOn = args[9];
73 final String deletedSequencesOff = args[10];
75 // =========================================== TRAFFIC FILTERING ============================================
77 TriggerTimesFileReader ttfr = new TriggerTimesFileReader();
78 List<Instant> triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false);
79 // Tag each trigger with "ON" or "OFF", assuming that the first trigger is an "ON" and that they alternate.
80 List<UserAction> userActions = new ArrayList<>();
81 for (int i = 0; i < triggerTimes.size(); i++) {
82 userActions.add(new UserAction(i % 2 == 0 ? Type.TOGGLE_ON : Type.TOGGLE_OFF, triggerTimes.get(i)));
84 TriggerTrafficExtractor tte = new TriggerTrafficExtractor(inputPcapFile, triggerTimes, deviceIp);
85 final PcapDumper outputter = Pcaps.openDead(DataLinkType.EN10MB, 65536).dumpOpen(outputPcapFile);
86 DnsMap dnsMap = new DnsMap();
87 TcpReassembler tcpReassembler = new TcpReassembler();
88 TrafficLabeler trafficLabeler = new TrafficLabeler(userActions);
89 tte.performExtraction(pkt -> {
92 } catch (NotOpenException e) {
95 }, dnsMap, tcpReassembler, trafficLabeler);
99 if (tte.getPacketsIncludedCount() != trafficLabeler.getTotalPacketCount()) {
100 // Sanity/debug check
101 throw new AssertionError(String.format("mismatch between packet count in %s and %s",
102 TriggerTrafficExtractor.class.getSimpleName(), TrafficLabeler.class.getSimpleName()));
105 // Extract all conversations present in the filtered trace.
106 List<Conversation> allConversations = tcpReassembler.getTcpConversations();
107 // Group conversations by hostname.
108 Map<String, List<Conversation>> convsByHostname =
109 TcpConversationUtils.groupConversationsByHostname(allConversations, dnsMap);
110 System.out.println("Grouped conversations by hostname.");
111 // For each hostname, count the frequencies of packet lengths exchanged with that hostname.
112 final Map<String, Map<Integer, Integer>> pktLenFreqsByHostname = new HashMap<>();
113 convsByHostname.forEach((host, convs) -> pktLenFreqsByHostname.put(host,
114 TcpConversationUtils.countPacketLengthFrequencies(convs)));
115 System.out.println("Counted frequencies of packet lengths exchanged with each hostname.");
116 // For each hostname, count the frequencies of packet sequences (i.e., count how many
117 // conversations exchange a sequence of packets of some specific lengths).
118 final Map<String, Map<String, Integer>> pktSeqFreqsByHostname = new HashMap<>();
119 convsByHostname.forEach((host, convs) -> pktSeqFreqsByHostname.put(host,
120 TcpConversationUtils.countPacketSequenceFrequencies(convs)));
121 System.out.println("Counted frequencies of packet sequences exchanged with each hostname.");
122 // For each hostname, count frequencies of packet pairs exchanged
123 // with that hostname across all conversations
124 final Map<String, Map<String, Integer>> pktPairFreqsByHostname =
125 TcpConversationUtils.countPacketPairFrequenciesByHostname(allConversations, dnsMap);
126 System.out.println("Counted frequencies of packet pairs per hostname");
127 // For each user action, reassemble the set of TCP connections occurring shortly after
128 final Map<UserAction, List<Conversation>> userActionToConversations =
129 trafficLabeler.getLabeledReassembledTcpTraffic();
130 final Map<UserAction, Map<String, List<Conversation>>> userActionsToConvsByHostname =
131 trafficLabeler.getLabeledReassembledTcpTraffic(dnsMap);
132 System.out.println("Reassembled TCP conversations occurring shortly after each user event");
135 * NOTE: no need to generate these more complex on/off maps that also contain mappings from hostname and
136 * sequence identifiers as we do not care about hostnames and sequences during clustering.
137 * We can simply use the UserAction->List<Conversation> map to generate ON/OFF groupings of conversations.
139 // Contains all ON events: hostname -> sequence identifier -> list of conversations with that sequence
140 Map<String, Map<String, List<Conversation>>> ons = new HashMap<>();
141 // Contains all OFF events: hostname -> sequence identifier -> list of conversations with that sequence
142 Map<String, Map<String, List<Conversation>>> offs = new HashMap<>();
143 userActionsToConvsByHostname.forEach((ua, hostnameToConvs) -> {
144 Map<String, Map<String, List<Conversation>>> outer = ua.getType() == Type.TOGGLE_ON ? ons : offs;
145 hostnameToConvs.forEach((host, convs) -> {
146 Map<String, List<Conversation>> seqsToConvs = TcpConversationUtils.
147 groupConversationsByPacketSequence(convs, verbose);
148 outer.merge(host, seqsToConvs, (oldMap, newMap) -> {
149 newMap.forEach((sequence, cs) -> oldMap.merge(sequence, cs, (list1, list2) -> {
158 // ============================================== PAIR CLUSTERING ============================================
159 // TODO: No need to use the more convoluted on/off maps; Can simply use the UserAction->List<Conversation> map
160 // TODO: when don't care about hostnames and sequences (see comment earlier).
161 // ===========================================================================================================
162 List<Conversation> onConversations = userActionToConversations.entrySet().stream().
163 filter(e -> e.getKey().getType() == Type.TOGGLE_ON). // drop all OFF events from stream
164 map(e -> e.getValue()). // no longer interested in the UserActions
165 flatMap(List::stream). // flatten List<List<T>> to a List<T>
166 collect(Collectors.toList());
167 List<Conversation> offConversations = userActionToConversations.entrySet().stream().
168 filter(e -> e.getKey().getType() == Type.TOGGLE_OFF).
169 map(e -> e.getValue()).
170 flatMap(List::stream).
171 collect(Collectors.toList());
172 //Collections.sort(onConversations, (c1, c2) -> c1.getPackets().)
174 List<PcapPacketPair> onPairs = onConversations.stream().
175 map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
176 TcpConversationUtils.extractPacketPairs(c)).
177 flatMap(List::stream). // flatten List<List<>> to List<>
178 collect(Collectors.toList());
179 List<PcapPacketPair> offPairs = offConversations.stream().
180 map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
181 TcpConversationUtils.extractPacketPairs(c)).
182 flatMap(List::stream). // flatten List<List<>> to List<>
183 collect(Collectors.toList());
184 // Note: need to update the DnsMap of all PcapPacketPairs if we want to use the IP/hostname-sensitive distance.
185 Stream.concat(Stream.of(onPairs), Stream.of(offPairs)).flatMap(List::stream).forEach(p -> p.setDnsMap(dnsMap));
186 // Perform clustering on conversation logged as part of all ON events.
187 // Calculate number of events per type (only ON/only OFF), which means half of the number of all timestamps.
188 int numberOfEventsPerType = triggerTimes.size() / 2;
189 int lowerBound = numberOfEventsPerType - (int)(numberOfEventsPerType * 0.1);
190 int upperBound = numberOfEventsPerType + (int)(numberOfEventsPerType * 0.1);
191 int minPts = lowerBound;
192 DBSCANClusterer<PcapPacketPair> onClusterer = new DBSCANClusterer<>(eps, minPts);
193 List<Cluster<PcapPacketPair>> onClusters = onClusterer.cluster(onPairs);
194 // Perform clustering on conversation logged as part of all OFF events.
195 DBSCANClusterer<PcapPacketPair> offClusterer = new DBSCANClusterer<>(eps, minPts);
196 List<Cluster<PcapPacketPair>> offClusters = offClusterer.cluster(offPairs);
197 // Sort the conversations as reference
198 List<Conversation> sortedAllConversation = TcpConversationUtils.sortConversationList(allConversations);
200 System.out.println("========================================");
201 System.out.println(" Clustering results for ON ");
202 System.out.println(" Number of clusters: " + onClusters.size());
204 List<List<List<PcapPacket>>> ppListOfListReadOn = new ArrayList<>();
205 List<List<List<PcapPacket>>> ppListOfListListOn = new ArrayList<>();
206 List<List<List<PcapPacket>>> corePointRangeSignatureOn = new ArrayList<>();
207 for (Cluster<PcapPacketPair> c : onClusters) {
208 System.out.println(String.format("<<< Cluster #%02d (%03d points) >>>", ++count, c.getPoints().size()));
209 System.out.print(PrintUtils.toSummaryString(c));
210 if(c.getPoints().size() > lowerBound && c.getPoints().size() < upperBound) {
212 List<List<PcapPacket>> ppListOfList = PcapPacketUtils.clusterToListOfPcapPackets(c);
213 // Check for overlaps and decide whether to do range-based or conservative checking
214 corePointRangeSignatureOn.add(PcapPacketUtils.extractRangeCorePoints(ppListOfList, eps, minPts));
215 ppListOfListListOn.add(ppListOfList);
218 System.out.println("========================================");
219 System.out.println(" Clustering results for OFF ");
220 System.out.println(" Number of clusters: " + offClusters.size());
222 List<List<List<PcapPacket>>> ppListOfListReadOff = new ArrayList<>();
223 List<List<List<PcapPacket>>> ppListOfListListOff = new ArrayList<>();
224 List<List<List<PcapPacket>>> corePointRangeSignatureOff = new ArrayList<>();
225 for (Cluster<PcapPacketPair> c : offClusters) {
226 System.out.println(String.format("<<< Cluster #%03d (%06d points) >>>", ++count, c.getPoints().size()));
227 System.out.print(PrintUtils.toSummaryString(c));
228 if(c.getPoints().size() > lowerBound && c.getPoints().size() < upperBound) {
230 List<List<PcapPacket>> ppListOfList = PcapPacketUtils.clusterToListOfPcapPackets(c);
231 // Check for overlaps and decide whether to do range-based or conservative checking
232 corePointRangeSignatureOff.add(PcapPacketUtils.extractRangeCorePoints(ppListOfList, eps, minPts));
233 ppListOfListListOff.add(ppListOfList);
237 // =========================================== SIGNATURE CREATION ===========================================
239 ppListOfListListOn = PcapPacketUtils.concatSequences(ppListOfListListOn, sortedAllConversation);
240 // TODO: Need to remove sequence number 0 for TP-Link plug since it is not a good signature!
241 // TODO: This sequence actually belongs to the local communication between the plug and the phone
242 // PcapPacketUtils.removeSequenceFromSignature(ppListOfListListOn, 0);
243 // Remove sequences in the list that have overlap
244 StringTokenizer stringTokenizerOn = new StringTokenizer(deletedSequencesOn, ",");
245 while(stringTokenizerOn.hasMoreTokens()) {
246 int sequenceToDelete = Integer.parseInt(stringTokenizerOn.nextToken());
247 if (sequenceToDelete == -1) { // '-1' means there is no removal
250 PcapPacketUtils.removeSequenceFromSignature(ppListOfListListOn, sequenceToDelete);
252 ppListOfListListOn = PcapPacketUtils.sortSequences(ppListOfListListOn);
255 ppListOfListListOff = PcapPacketUtils.concatSequences(ppListOfListListOff, sortedAllConversation);
256 // TODO: Need to remove sequence number 0 for TP-Link plug since it is not a good signature!
257 // TODO: This sequence actually belongs to the local communication between the plug and the phone
258 // PcapPacketUtils.removeSequenceFromSignature(ppListOfListListOff, 0);
259 // Remove sequences in the list that have overlap
260 StringTokenizer stringTokenizerOff = new StringTokenizer(deletedSequencesOff, ",");
261 while(stringTokenizerOff.hasMoreTokens()) {
262 int sequenceToDelete = Integer.parseInt(stringTokenizerOff.nextToken());
263 if (sequenceToDelete == -1) { // '-1' means there is no removal
266 PcapPacketUtils.removeSequenceFromSignature(ppListOfListListOff, sequenceToDelete);
268 ppListOfListListOff = PcapPacketUtils.sortSequences(ppListOfListListOff);
270 // Write the signatures into the screen
271 System.out.println("========================================");
272 System.out.println(" ON Signature ");
273 System.out.println("========================================");
274 PcapPacketUtils.printSignatures(ppListOfListListOn);
275 System.out.println("========================================");
276 System.out.println(" OFF Signature ");
277 System.out.println("========================================");
278 PcapPacketUtils.printSignatures(ppListOfListListOff);
279 // Printing signatures into files
280 PrintUtils.serializeIntoFile(onSignatureFile, ppListOfListListOn);
281 PrintUtils.serializeIntoFile(offSignatureFile, ppListOfListListOff);
282 // Printing cluster analyses into files
283 PrintUtils.serializeIntoFile(onClusterAnalysisFile, corePointRangeSignatureOn);
284 PrintUtils.serializeIntoFile(offClusterAnalysisFile, corePointRangeSignatureOff);
286 // =========================================== SIGNATURE DURATION ===========================================
287 List<Instant> firstSignatureTimestamps = new ArrayList<>();
288 List<Instant> lastSignatureTimestamps = new ArrayList<>();
289 if (!ppListOfListListOn.isEmpty()) {
290 List<List<PcapPacket>> firstListOnSign = ppListOfListListOn.get(0);
291 List<List<PcapPacket>> lastListOnSign = ppListOfListListOn.get(ppListOfListListOn.size() - 1);
292 // Load ON signature first and last packet's timestamps
293 for (List<PcapPacket> list : firstListOnSign) {
294 // Get timestamp Instant from the last packet
295 firstSignatureTimestamps.add(list.get(0).getTimestamp());
297 for (List<PcapPacket> list : lastListOnSign) {
298 // Get timestamp Instant from the last packet
299 int lastPacketIndex = list.size() - 1;
300 lastSignatureTimestamps.add(list.get(lastPacketIndex).getTimestamp());
304 if (!ppListOfListListOn.isEmpty()) {
305 List<List<PcapPacket>> firstListOffSign = ppListOfListListOff.get(0);
306 List<List<PcapPacket>> lastListOffSign = ppListOfListListOff.get(ppListOfListListOff.size() - 1);
307 // Load OFF signature first and last packet's timestamps
308 for (List<PcapPacket> list : firstListOffSign) {
309 // Get timestamp Instant from the last packet
310 firstSignatureTimestamps.add(list.get(0).getTimestamp());
312 for (List<PcapPacket> list : lastListOffSign) {
313 // Get timestamp Instant from the last packet
314 int lastPacketIndex = list.size() - 1;
315 lastSignatureTimestamps.add(list.get(lastPacketIndex).getTimestamp());
318 // Sort the timestamps
319 firstSignatureTimestamps.sort((p1, p2) -> {
320 return p1.compareTo(p2);
322 // Sort the timestamps
323 lastSignatureTimestamps.sort((p1, p2) -> {
324 return p1.compareTo(p2);
327 Iterator<Instant> iterFirst = firstSignatureTimestamps.iterator();
328 Iterator<Instant> iterLast = lastSignatureTimestamps.iterator();
330 long maxDuration = Long.MIN_VALUE;
331 System.out.println("========================================");
332 System.out.println(" Signature Durations ");
333 System.out.println("========================================");
334 while (iterFirst.hasNext() && iterLast.hasNext()) {
335 Instant firstInst = (Instant) iterFirst.next();
336 Instant lastInst = (Instant) iterLast.next();
337 Duration dur = Duration.between(firstInst, lastInst);
338 duration = dur.toMillis();
339 // Check duration --- should be below 15 seconds
340 if (duration > TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS) {
341 while (duration > TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS && iterFirst.hasNext()) {
342 // that means we have to move to the next trigger
343 firstInst = (Instant) iterFirst.next();
345 dur = Duration.between(firstInst, lastInst);
346 duration = dur.toMillis();
347 } else { // Below 0/Negative --- that means we have to move to the next signature
348 while (duration < 0 && iterLast.hasNext()) { // that means we have to move to the next trigger
349 lastInst = (Instant) iterLast.next();
351 dur = Duration.between(firstInst, lastInst);
352 duration = dur.toMillis();
354 System.out.println(duration);
355 // Update duration if this bigger than the max value and still less than the window inclusion time
356 maxDuration = maxDuration < duration && duration <= TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS ?
357 duration : maxDuration;
359 // Just assign the value 0 if there is no signature
360 if (maxDuration == Long.MIN_VALUE) {
363 System.out.println("========================================");
364 System.out.println("Max signature duration: " + maxDuration);
365 System.out.println("========================================");
367 // ==========================================================================================================