1 package edu.uci.iotproject;
3 import static edu.uci.iotproject.analysis.UserAction.Type;
5 import edu.uci.iotproject.analysis.*;
6 import edu.uci.iotproject.io.PrintWriterUtils;
7 import edu.uci.iotproject.io.TriggerTimesFileReader;
8 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
9 import edu.uci.iotproject.trafficreassembly.layer3.TcpReassembler;
10 import edu.uci.iotproject.util.PcapPacketUtils;
11 import edu.uci.iotproject.util.PrintUtils;
12 import org.apache.commons.math3.stat.clustering.Cluster;
13 import org.apache.commons.math3.stat.clustering.DBSCANClusterer;
14 import org.pcap4j.core.*;
15 import org.pcap4j.packet.namednumber.DataLinkType;
18 import java.net.UnknownHostException;
19 import java.time.Duration;
20 import java.time.Instant;
22 import java.util.concurrent.TimeoutException;
23 import java.util.stream.Collectors;
24 import java.util.stream.Stream;
27 * This is a system that reads PCAP files to compare
28 * patterns of DNS hostnames, packet sequences, and packet
29 * lengths with training data to determine certain events
30 * or actions for smart home devices.
32 * @author Janus Varmarken
33 * @author Rahmadi Trimananda (rtrimana@uci.edu)
36 public class SignatureGenerator {
39 * If set to {@code true}, output written to the results file is also dumped to standard out.
41 private static boolean DUPLICATE_OUTPUT_TO_STD_OUT = true;
43 * File name for logging.
45 private static String LOG_EXTENSION = "_signature-generation.log";
47 * Directory for logging.
49 private static String LOG_DIRECTORY = "./";
51 public static void main(String[] args) throws PcapNativeException, NotOpenException, EOFException,
52 TimeoutException, UnknownHostException, IOException {
53 // -------------------------------------------------------------------------------------------------------------
54 // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------
55 if (args.length < 11) {
56 String errMsg = String.format("SPECTO version 1.0\n" +
57 "Copyright (C) 2018-2019 Janus Varmarken and Rahmadi Trimananda.\n" +
58 "University of California, Irvine.\n" +
59 "All rights reserved.\n\n" +
60 "Usage: %s inputPcapFile outputPcapFile triggerTimesFile deviceIp" +
61 " onSignatureFile offSignatureFile onClusterAnalysisFile offClusterAnalysisFile epsilon" +
62 " deletedSequencesOn deletedSequencesOff" +
63 "\n inputPcapFile: the target of the detection" +
64 "\n outputPcapFile: the processed PCAP file through 15-second window filtering" +
65 "\n triggerTimesFile: the trigger timestamps" +
66 "\n deviceIp: the IP address of the device we want to generate a signature for" +
67 "\n onSignatureFile: name of the ON signature file" +
68 "\n offSignatureFile: name of the OFF signature file" +
69 "\n onClusterAnalysisFile: name of the ON signature cluster analysis file" +
70 "\n offClusterAnalysisFile: name of the OFF signature cluster analysis file" +
71 "\n epsilon: epsilon value of the DBSCAN algorithm" +
72 "\n deletedSequencesOn: sequences to be deleted from the final ON signature" +
73 " (please separate with commas, e.g., 0,1,2, or put '-1' if not needed)" +
74 "\n deletedSequencesOff: sequences to be deleted from the final OFF signature" +
75 " (please separate with commas, e.g., 0,1,2, or put '-1' if not needed)",
76 SignatureGenerator.class.getSimpleName());
77 System.out.println(errMsg);
80 boolean verbose = true;
81 final String inputPcapFile = args[0];
82 final String outputPcapFile = args[1];
83 final String triggerTimesFile = args[2];
84 final String deviceIp = args[3];
85 final String onSignatureFile = args[4];
86 final String offSignatureFile = args[5];
87 final String onClusterAnalysisFile = args[6];
88 final String offClusterAnalysisFile = args[7];
89 final double eps = Double.parseDouble(args[8]);
90 final String deletedSequencesOn = args[9];
91 final String deletedSequencesOff = args[10];
92 final String logFile = inputPcapFile + LOG_EXTENSION;
94 // Prepare file outputter.
95 File outputFile = new File(logFile);
96 outputFile.getParentFile().mkdirs();
97 final PrintWriter resultsWriter = new PrintWriter(new FileWriter(outputFile));
99 // =========================================== TRAFFIC FILTERING ============================================
101 TriggerTimesFileReader ttfr = new TriggerTimesFileReader();
102 List<Instant> triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false);
103 // Tag each trigger with "ON" or "OFF", assuming that the first trigger is an "ON" and that they alternate.
104 List<UserAction> userActions = new ArrayList<>();
105 for (int i = 0; i < triggerTimes.size(); i++) {
106 userActions.add(new UserAction(i % 2 == 0 ? Type.TOGGLE_ON : Type.TOGGLE_OFF, triggerTimes.get(i)));
108 TriggerTrafficExtractor tte = new TriggerTrafficExtractor(inputPcapFile, triggerTimes, deviceIp);
109 final PcapDumper outputter = Pcaps.openDead(DataLinkType.EN10MB, 65536).dumpOpen(outputPcapFile);
110 DnsMap dnsMap = new DnsMap();
111 TcpReassembler tcpReassembler = new TcpReassembler();
112 TrafficLabeler trafficLabeler = new TrafficLabeler(userActions);
113 tte.performExtraction(pkt -> {
116 } catch (NotOpenException e) {
119 }, dnsMap, tcpReassembler, trafficLabeler);
123 if (tte.getPacketsIncludedCount() != trafficLabeler.getTotalPacketCount()) {
124 // Sanity/debug check
125 throw new AssertionError(String.format("mismatch between packet count in %s and %s",
126 TriggerTrafficExtractor.class.getSimpleName(), TrafficLabeler.class.getSimpleName()));
129 // Extract all conversations present in the filtered trace.
130 List<Conversation> allConversations = tcpReassembler.getTcpConversations();
131 // Group conversations by hostname.
132 Map<String, List<Conversation>> convsByHostname =
133 TcpConversationUtils.groupConversationsByHostname(allConversations, dnsMap);
134 PrintWriterUtils.println("Grouped conversations by hostname.", resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
135 // For each hostname, count the frequencies of packet lengths exchanged with that hostname.
136 final Map<String, Map<Integer, Integer>> pktLenFreqsByHostname = new HashMap<>();
137 convsByHostname.forEach((host, convs) -> pktLenFreqsByHostname.put(host,
138 TcpConversationUtils.countPacketLengthFrequencies(convs)));
139 PrintWriterUtils.println("Counted frequencies of packet lengths exchanged with each hostname.",
140 resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
141 // For each hostname, count the frequencies of packet sequences (i.e., count how many
142 // conversations exchange a sequence of packets of some specific lengths).
143 final Map<String, Map<String, Integer>> pktSeqFreqsByHostname = new HashMap<>();
144 convsByHostname.forEach((host, convs) -> pktSeqFreqsByHostname.put(host,
145 TcpConversationUtils.countPacketSequenceFrequencies(convs)));
146 PrintWriterUtils.println("Counted frequencies of packet sequences exchanged with each hostname.",
147 resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
148 // For each hostname, count frequencies of packet pairs exchanged
149 // with that hostname across all conversations
150 final Map<String, Map<String, Integer>> pktPairFreqsByHostname =
151 TcpConversationUtils.countPacketPairFrequenciesByHostname(allConversations, dnsMap);
152 PrintWriterUtils.println("Counted frequencies of packet pairs per hostname.",
153 resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
154 // For each user action, reassemble the set of TCP connections occurring shortly after
155 final Map<UserAction, List<Conversation>> userActionToConversations =
156 trafficLabeler.getLabeledReassembledTcpTraffic();
157 final Map<UserAction, Map<String, List<Conversation>>> userActionsToConvsByHostname =
158 trafficLabeler.getLabeledReassembledTcpTraffic(dnsMap);
159 PrintWriterUtils.println("Reassembled TCP conversations occurring shortly after each user event.",
160 resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
163 * NOTE: no need to generate these more complex on/off maps that also contain mappings from hostname and
164 * sequence identifiers as we do not care about hostnames and sequences during clustering.
165 * We can simply use the UserAction->List<Conversation> map to generate ON/OFF groupings of conversations.
167 // Contains all ON events: hostname -> sequence identifier -> list of conversations with that sequence
168 Map<String, Map<String, List<Conversation>>> ons = new HashMap<>();
169 // Contains all OFF events: hostname -> sequence identifier -> list of conversations with that sequence
170 Map<String, Map<String, List<Conversation>>> offs = new HashMap<>();
171 userActionsToConvsByHostname.forEach((ua, hostnameToConvs) -> {
172 Map<String, Map<String, List<Conversation>>> outer = ua.getType() == Type.TOGGLE_ON ? ons : offs;
173 hostnameToConvs.forEach((host, convs) -> {
174 Map<String, List<Conversation>> seqsToConvs = TcpConversationUtils.
175 groupConversationsByPacketSequence(convs, verbose);
176 outer.merge(host, seqsToConvs, (oldMap, newMap) -> {
177 newMap.forEach((sequence, cs) -> oldMap.merge(sequence, cs, (list1, list2) -> {
186 // ============================================== PAIR CLUSTERING ============================================
187 // TODO: No need to use the more convoluted on/off maps; Can simply use the UserAction->List<Conversation> map
188 // TODO: when don't care about hostnames and sequences (see comment earlier).
189 // ===========================================================================================================
190 List<Conversation> onConversations = userActionToConversations.entrySet().stream().
191 filter(e -> e.getKey().getType() == Type.TOGGLE_ON). // drop all OFF events from stream
192 map(e -> e.getValue()). // no longer interested in the UserActions
193 flatMap(List::stream). // flatten List<List<T>> to a List<T>
194 collect(Collectors.toList());
195 List<Conversation> offConversations = userActionToConversations.entrySet().stream().
196 filter(e -> e.getKey().getType() == Type.TOGGLE_OFF).
197 map(e -> e.getValue()).
198 flatMap(List::stream).
199 collect(Collectors.toList());
200 //Collections.sort(onConversations, (c1, c2) -> c1.getPackets().)
202 List<PcapPacketPair> onPairs = onConversations.stream().
203 map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
204 TcpConversationUtils.extractPacketPairs(c)).
205 flatMap(List::stream). // flatten List<List<>> to List<>
206 collect(Collectors.toList());
207 List<PcapPacketPair> offPairs = offConversations.stream().
208 map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
209 TcpConversationUtils.extractPacketPairs(c)).
210 flatMap(List::stream). // flatten List<List<>> to List<>
211 collect(Collectors.toList());
212 // Note: need to update the DnsMap of all PcapPacketPairs if we want to use the IP/hostname-sensitive distance.
213 Stream.concat(Stream.of(onPairs), Stream.of(offPairs)).flatMap(List::stream).forEach(p -> p.setDnsMap(dnsMap));
214 // Perform clustering on conversation logged as part of all ON events.
215 // Calculate number of events per type (only ON/only OFF), which means half of the number of all timestamps.
216 int numberOfEventsPerType = triggerTimes.size() / 2;
217 int lowerBound = numberOfEventsPerType - (int)(numberOfEventsPerType * 0.1);
218 int upperBound = numberOfEventsPerType + (int)(numberOfEventsPerType * 0.1);
219 int minPts = lowerBound;
220 DBSCANClusterer<PcapPacketPair> onClusterer = new DBSCANClusterer<>(eps, minPts);
221 List<Cluster<PcapPacketPair>> onClusters = onClusterer.cluster(onPairs);
222 // Perform clustering on conversation logged as part of all OFF events.
223 DBSCANClusterer<PcapPacketPair> offClusterer = new DBSCANClusterer<>(eps, minPts);
224 List<Cluster<PcapPacketPair>> offClusters = offClusterer.cluster(offPairs);
225 // Sort the conversations as reference
226 List<Conversation> sortedAllConversation = TcpConversationUtils.sortConversationList(allConversations);
228 PrintWriterUtils.println("========================================", resultsWriter,
229 DUPLICATE_OUTPUT_TO_STD_OUT);
230 PrintWriterUtils.println(" Clustering results for ON ", resultsWriter,
231 DUPLICATE_OUTPUT_TO_STD_OUT);
232 PrintWriterUtils.println(" Number of clusters: " + onClusters.size(), resultsWriter,
233 DUPLICATE_OUTPUT_TO_STD_OUT);
235 List<List<List<PcapPacket>>> ppListOfListReadOn = new ArrayList<>();
236 List<List<List<PcapPacket>>> ppListOfListListOn = new ArrayList<>();
237 List<List<List<PcapPacket>>> corePointRangeSignatureOn = new ArrayList<>();
238 for (Cluster<PcapPacketPair> c : onClusters) {
239 PrintWriterUtils.println(String.format("<<< Cluster #%02d (%03d points) >>>", ++count, c.getPoints().size()),
240 resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
241 PrintWriterUtils.println(PrintUtils.toSummaryString(c), resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
242 if(c.getPoints().size() > lowerBound && c.getPoints().size() < upperBound) {
244 List<List<PcapPacket>> ppListOfList = PcapPacketUtils.clusterToListOfPcapPackets(c);
245 // Check for overlaps and decide whether to do range-based or conservative checking
246 corePointRangeSignatureOn.add(PcapPacketUtils.extractRangeCorePoints(ppListOfList, eps, minPts));
247 ppListOfListListOn.add(ppListOfList);
250 PrintWriterUtils.println("========================================", resultsWriter,
251 DUPLICATE_OUTPUT_TO_STD_OUT);
252 PrintWriterUtils.println(" Clustering results for OFF ", resultsWriter,
253 DUPLICATE_OUTPUT_TO_STD_OUT);
254 PrintWriterUtils.println(" Number of clusters: " + offClusters.size(), resultsWriter,
255 DUPLICATE_OUTPUT_TO_STD_OUT);
257 List<List<List<PcapPacket>>> ppListOfListReadOff = new ArrayList<>();
258 List<List<List<PcapPacket>>> ppListOfListListOff = new ArrayList<>();
259 List<List<List<PcapPacket>>> corePointRangeSignatureOff = new ArrayList<>();
260 for (Cluster<PcapPacketPair> c : offClusters) {
261 PrintWriterUtils.println(String.format("<<< Cluster #%03d (%06d points) >>>", ++count, c.getPoints().size()),
262 resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
263 PrintWriterUtils.println(PrintUtils.toSummaryString(c), resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
264 if(c.getPoints().size() > lowerBound && c.getPoints().size() < upperBound) {
266 List<List<PcapPacket>> ppListOfList = PcapPacketUtils.clusterToListOfPcapPackets(c);
267 // Check for overlaps and decide whether to do range-based or conservative checking
268 corePointRangeSignatureOff.add(PcapPacketUtils.extractRangeCorePoints(ppListOfList, eps, minPts));
269 ppListOfListListOff.add(ppListOfList);
273 // =========================================== SIGNATURE CREATION ===========================================
275 ppListOfListListOn = PcapPacketUtils.concatSequences(ppListOfListListOn, sortedAllConversation);
276 // Remove sequences in the list that have overlap
277 StringTokenizer stringTokenizerOn = new StringTokenizer(deletedSequencesOn, ",");
278 while(stringTokenizerOn.hasMoreTokens()) {
279 int sequenceToDelete = Integer.parseInt(stringTokenizerOn.nextToken());
280 if (sequenceToDelete == -1) { // '-1' means there is no removal
283 PcapPacketUtils.removeSequenceFromSignature(ppListOfListListOn, sequenceToDelete);
285 PrintWriterUtils.println("ON Sequences: ", resultsWriter,
286 DUPLICATE_OUTPUT_TO_STD_OUT);
287 for(List<List<PcapPacket>> listOfList : ppListOfListListOn) {
288 PrintWriterUtils.println(listOfList.get(0).get(0).length() + "...", resultsWriter,
289 DUPLICATE_OUTPUT_TO_STD_OUT);
291 ppListOfListListOn = PcapPacketUtils.sortSequences(ppListOfListListOn);
292 PrintWriterUtils.println("Concatenated and sorted ON signature sequences...", resultsWriter,
293 DUPLICATE_OUTPUT_TO_STD_OUT);
296 ppListOfListListOff = PcapPacketUtils.concatSequences(ppListOfListListOff, sortedAllConversation);
297 // Remove sequences in the list that have overlap
298 StringTokenizer stringTokenizerOff = new StringTokenizer(deletedSequencesOff, ",");
299 while(stringTokenizerOff.hasMoreTokens()) {
300 int sequenceToDelete = Integer.parseInt(stringTokenizerOff.nextToken());
301 if (sequenceToDelete == -1) { // '-1' means there is no removal
304 PcapPacketUtils.removeSequenceFromSignature(ppListOfListListOff, sequenceToDelete);
306 PrintWriterUtils.println("OFF Sequences: ", resultsWriter,
307 DUPLICATE_OUTPUT_TO_STD_OUT);
308 for(List<List<PcapPacket>> listOfList : ppListOfListListOff) {
309 PrintWriterUtils.println(listOfList.get(0).get(0).length() + "...", resultsWriter,
310 DUPLICATE_OUTPUT_TO_STD_OUT);
312 ppListOfListListOff = PcapPacketUtils.sortSequences(ppListOfListListOff);
313 PrintWriterUtils.println("Concatenated and sorted OFF signature sequences...", resultsWriter,
314 DUPLICATE_OUTPUT_TO_STD_OUT);
316 // Write the signatures into the screen
317 PrintWriterUtils.println("========================================", resultsWriter,
318 DUPLICATE_OUTPUT_TO_STD_OUT);
319 PrintWriterUtils.println(" ON Signature ", resultsWriter,
320 DUPLICATE_OUTPUT_TO_STD_OUT);
321 PrintWriterUtils.println("========================================", resultsWriter,
322 DUPLICATE_OUTPUT_TO_STD_OUT);
323 PcapPacketUtils.printSignatures(ppListOfListListOn, resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
324 PrintWriterUtils.println("========================================", resultsWriter,
325 DUPLICATE_OUTPUT_TO_STD_OUT);
326 PrintWriterUtils.println(" OFF Signature ", resultsWriter,
327 DUPLICATE_OUTPUT_TO_STD_OUT);
328 PrintWriterUtils.println("========================================", resultsWriter,
329 DUPLICATE_OUTPUT_TO_STD_OUT);
330 PcapPacketUtils.printSignatures(ppListOfListListOff, resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
331 // Clean signatures from null elements
332 PcapPacketUtils.cleanSignature(ppListOfListListOn);
333 PcapPacketUtils.cleanSignature(ppListOfListListOff);
334 // Printing signatures into files
335 PrintUtils.serializeIntoFile(onSignatureFile, ppListOfListListOn);
336 PrintUtils.serializeIntoFile(offSignatureFile, ppListOfListListOff);
337 // Printing cluster analyses into files
338 PrintUtils.serializeIntoFile(onClusterAnalysisFile, corePointRangeSignatureOn);
339 PrintUtils.serializeIntoFile(offClusterAnalysisFile, corePointRangeSignatureOff);
341 // =========================================== SIGNATURE DURATIONS =============================================
342 List<Instant> firstSignatureTimestamps = new ArrayList<>();
343 List<Instant> lastSignatureTimestamps = new ArrayList<>();
344 if (!ppListOfListListOn.isEmpty()) {
345 List<List<PcapPacket>> firstListOnSign = ppListOfListListOn.get(0);
346 List<List<PcapPacket>> lastListOnSign = ppListOfListListOn.get(ppListOfListListOn.size() - 1);
347 // Load ON signature first and last packet's timestamps
348 for (List<PcapPacket> list : firstListOnSign) {
349 // Get timestamp Instant from the last packet
350 firstSignatureTimestamps.add(list.get(0).getTimestamp());
352 for (List<PcapPacket> list : lastListOnSign) {
353 // Get timestamp Instant from the last packet
354 int lastPacketIndex = list.size() - 1;
355 lastSignatureTimestamps.add(list.get(lastPacketIndex).getTimestamp());
359 if (!ppListOfListListOff.isEmpty()) {
360 List<List<PcapPacket>> firstListOffSign = ppListOfListListOff.get(0);
361 List<List<PcapPacket>> lastListOffSign = ppListOfListListOff.get(ppListOfListListOff.size() - 1);
362 // Load OFF signature first and last packet's timestamps
363 for (List<PcapPacket> list : firstListOffSign) {
364 // Get timestamp Instant from the last packet
365 firstSignatureTimestamps.add(list.get(0).getTimestamp());
367 for (List<PcapPacket> list : lastListOffSign) {
368 // Get timestamp Instant from the last packet
369 int lastPacketIndex = list.size() - 1;
370 lastSignatureTimestamps.add(list.get(lastPacketIndex).getTimestamp());
373 // Sort the timestamps
374 firstSignatureTimestamps.sort(Comparator.comparing(Instant::toEpochMilli));
375 lastSignatureTimestamps.sort(Comparator.comparing(Instant::toEpochMilli));
377 Iterator<Instant> iterFirst = firstSignatureTimestamps.iterator();
378 Iterator<Instant> iterLast = lastSignatureTimestamps.iterator();
380 long maxDuration = Long.MIN_VALUE;
381 PrintWriterUtils.println("========================================", resultsWriter,
382 DUPLICATE_OUTPUT_TO_STD_OUT);
383 PrintWriterUtils.println(" Signature Durations ", resultsWriter,
384 DUPLICATE_OUTPUT_TO_STD_OUT);
385 PrintWriterUtils.println("========================================", resultsWriter,
386 DUPLICATE_OUTPUT_TO_STD_OUT);
387 while (iterFirst.hasNext() && iterLast.hasNext()) {
388 Instant firstInst = iterFirst.next();
389 Instant lastInst = iterLast.next();
390 Duration dur = Duration.between(firstInst, lastInst);
391 duration = dur.toMillis();
392 // Check duration --- should be below 15 seconds
393 if (duration > TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS) {
394 while (duration > TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS && iterFirst.hasNext()) {
395 // that means we have to move to the next trigger
396 firstInst = iterFirst.next();
397 dur = Duration.between(firstInst, lastInst);
398 duration = dur.toMillis();
400 } else { // Below 0/Negative --- that means we have to move to the next signature
401 while (duration < 0 && iterLast.hasNext()) {
402 // that means we have to move to the next trigger
403 lastInst = iterLast.next();
404 dur = Duration.between(firstInst, lastInst);
405 duration = dur.toMillis();
408 PrintWriterUtils.println(duration, resultsWriter, DUPLICATE_OUTPUT_TO_STD_OUT);
409 // Update duration if this bigger than the max value and still less than the window inclusion time
410 maxDuration = maxDuration < duration && duration <= TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS ?
411 duration : maxDuration;
413 // Just assign the value 0 if there is no signature
414 if (maxDuration == Long.MIN_VALUE) {
417 PrintWriterUtils.println("========================================", resultsWriter,
418 DUPLICATE_OUTPUT_TO_STD_OUT);
419 PrintWriterUtils.println("Max signature duration: " + maxDuration, resultsWriter,
420 DUPLICATE_OUTPUT_TO_STD_OUT);
421 PrintWriterUtils.println("========================================", resultsWriter,
422 DUPLICATE_OUTPUT_TO_STD_OUT);
423 resultsWriter.flush();
424 resultsWriter.close();
425 // ==========================================================================================================