4 Script that takes a file (output by wireshark/tshark, in JSON format) and analyze
5 the traffic frequency of a certain device at a certain time.
11 from collections import defaultdict
12 from dateutil import parser
13 from datetime import datetime
16 JSON_KEY_SOURCE = "_source"
17 JSON_KEY_LAYERS = "layers"
20 JSON_KEY_ETH_DST = "eth.dst"
21 JSON_KEY_ETH_SRC = "eth.src"
22 JSON_KEY_FRAME = "frame"
23 JSON_KEY_FRAME_TIME = "frame.time"
24 JSON_KEY_FRAME_LEN = "frame.len"
25 TABLE_HEADER_X = "Timestamp (hh:mm:ss)"
26 TABLE_HEADER_Y = "Packet frequency"
27 INCOMING_APPENDIX = "_incoming"
28 OUTGOING_APPENDIX = "_outgoing"
29 FILE_APPENDIX = ".dat"
31 # Use this constant as a flag
33 USE_MOVING_AVERAGE = False
35 # Range = 6, i.e. 3 to left and 3 to right (in seconds)
36 #TOTAL_RANGE = 60 # TOTAL_RANGE = 2 x RANGE
38 TOTAL_RANGE = 20 # TOTAL_RANGE = 2 x RANGE
41 def moving_average(array, window=3):
42 """ Calculate moving average
44 array: array of numbers
45 window: window of moving average (default = 3)
47 https://stackoverflow.com/questions/14313510/how-to-calculate-moving-average-using-numpy
49 # Check if window > len(array)
50 if window > len(array):
52 # Calculate cumulative sum of each array element
53 retarr = np.cumsum(array, dtype=float)
54 # Adjust cumulative sum of each array element
55 # based on window size
56 retarr[window:] = retarr[window:] - retarr[:-window]
57 # Pad the first array elements with zeroes
58 retarr[:window - 1] = np.zeros(window - 1)
59 # Calculate moving average starting from the element
60 # at window size, e.g. element 4 for window=5
61 retarr[window - 1:] = retarr[window - 1:] / window
64 def hms_to_seconds(t):
65 """ Calculate hms to seconds
67 t = time in hh:mm:ss string
69 https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds
71 h, m, s = [int(i) for i in t.split(':')]
72 return 3600*h + 60*m + s
74 def seconds_to_hms(t):
75 """ Calculate seconds to hms
79 https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds
82 m = (t - (h * 3600)) / 60
83 s = t - (h * 3600) - (m * 60)
93 return hh + ":" + mm + ":" + ss
95 def include_timestamps_zero_packets(timelen):
96 """ Include every second that has zero packets (no packets/transmission)
98 timelen = dictionary that maps timestamps to packet length
101 for key in sorted(timelen):
102 sortedkeylist.append(key)
103 first = sortedkeylist[0]
104 last = sortedkeylist[len(sortedkeylist)-1]
105 # Calculate the number of seconds between first and last packets
106 first_seconds = hms_to_seconds(first)
107 last_seconds = hms_to_seconds(last)
108 seconds = last_seconds - first_seconds
109 # Start counting and filling in timestamps with zero packets
111 while counter < seconds:
112 timestamp = seconds_to_hms(first_seconds + counter)
113 if timestamp not in timelen:
114 timelen[timestamp] = 0
119 def save_to_file(tblheader, dictionary, filenameout):
120 """ Show summary of statistics of PCAP file
122 tblheader: header for the saved table
123 dictionary: dictionary to be saved
124 filename_out: file name to save
126 # Appending, not overwriting!
127 f = open(filenameout, 'a')
128 # Write the table header
129 f.write("# " + tblheader + "\n")
130 f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n")
131 # Write "0 0" if dictionary is empty
135 print "Writing zeroes to file: ", filenameout
138 if USE_MOVING_AVERAGE:
139 # Use moving average if this flag is true
141 for key in sorted(dictionary):
142 sortedarr.append(dictionary[key])
143 valarr = moving_average(sortedarr, WINDOW_SIZE)
145 # Iterate over dictionary and write (key, value) pairs
147 for key in sorted(dictionary):
149 f.write(str(key) + " " + str(valarr[ind]) + "\n")
154 # Iterate over dictionary and write (key, value) pairs
158 for key in sorted(dictionary):
159 sortedlist.append(key)
160 print "Key: ", key, " - Value: ", dictionary[key], " - Ind: ", ind
162 first = hms_to_seconds(sortedlist[0])
163 #print "First: ", key
164 last = hms_to_seconds(sortedlist[ind-1])
167 # Put new binning keys
170 while time_ind < last:
171 # Initialize with the first key in the list
172 curr_key = sortedlist[ind]
173 curr_key_secs = hms_to_seconds(curr_key)
174 # Initialize with 0 first
175 resultdict[time_ind] = 0
176 # Check if this is still within RANGE - bin the value if it is
177 while time_ind - RANGE <= curr_key_secs and curr_key_secs <= time_ind + RANGE:
178 resultdict[time_ind] += dictionary[curr_key]
179 print "Time index: ", seconds_to_hms(time_ind), " Value: ", resultdict[time_ind]
181 if ind > len(dictionary)-1:
183 # Initialize with the key in the list
184 curr_key = sortedlist[ind]
185 curr_key_secs = hms_to_seconds(curr_key)
186 # Increment time index
187 time_ind += TOTAL_RANGE
188 # Now write to file after binning
189 for key in sorted(resultdict):
191 f.write(seconds_to_hms(key) + " " + str(resultdict[key]) + "\n")
192 #print seconds_to_hms(key) + " " + str(resultdict[key])
195 # Iterate over dictionary and write (key, value) pairs
196 for key in sorted(dictionary):
198 f.write(str(key) + " " + str(dictionary[key]) + "\n")
200 print "Writing output to file: ", filenameout
206 if len(sys.argv) < 5:
207 print "Usage: python", sys.argv[0], "<input_file> <output_file> <device_name> <mac_address>"
209 # Parse the file for the specified MAC address
210 print "====================================================================="
211 print "Analyzing file: ", sys.argv[1]
212 timelen_incoming = parse_json(sys.argv[1], sys.argv[4], True)
213 if len(timelen_incoming) > 0:
214 timelen_incoming = include_timestamps_zero_packets(timelen_incoming)
215 print "==> Printing incoming traffic ..."
216 save_to_file(sys.argv[3] + INCOMING_APPENDIX, timelen_incoming, sys.argv[2] + INCOMING_APPENDIX + FILE_APPENDIX)
218 print "No incoming traffic to this MAC address!"
219 print "====================================================================="
220 timelen_outgoing = parse_json(sys.argv[1], sys.argv[4], False)
221 if len(timelen_outgoing) > 0:
222 timelen_outgoing = include_timestamps_zero_packets(timelen_outgoing)
223 print "==> Printing outgoing traffic ..."
224 save_to_file(sys.argv[3] + OUTGOING_APPENDIX, timelen_outgoing, sys.argv[2] + OUTGOING_APPENDIX + FILE_APPENDIX)
226 print "No outgoing traffic from this MAC address!"
227 print "====================================================================="
228 #for time in time_freq.keys():
229 #for key in sorted(time_freq):
230 # print key, " => ", time_freq[key]
231 #print "====================================================================="
234 # Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs.
235 def parse_json(filepath, macaddress, incomingoutgoing):
236 """ Show summary of statistics of PCAP file
238 filepath: path of the read file
239 macaddress: MAC address of a device to analyze
240 incomingoutgoing: boolean to define whether we collect incoming or outgoing traffic
241 True = incoming, False = outgoing
243 # Maps timestamps to lengths of packets
245 with open(filepath) as jf:
247 # data becomes reference to root JSON object (or in our case json array)
249 # Loop through json objects in data
250 # Each entry is a pcap entry (request/response (packet) and associated metadata)
252 # p is a JSON object, not an index
253 layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
255 frame = layers.get(JSON_KEY_FRAME, None)
256 datetime = frame.get(JSON_KEY_FRAME_TIME, None)
258 length = frame.get(JSON_KEY_FRAME_LEN, None)
259 # Get into the Ethernet address part
260 eth = layers.get(JSON_KEY_ETH, None)
261 # Skip any non DNS traffic
263 print "[ WARNING: Packet has no ethernet address! ]"
265 # Get source and destination MAC addresses
266 src = eth.get(JSON_KEY_ETH_SRC, None)
267 dst = eth.get(JSON_KEY_ETH_DST, None)
268 # Get just the time part
269 datetimeobj = parser.parse(datetime)
270 # Remove the microsecond part
271 timestr = str(datetimeobj.time())[:8]
272 #print str(timestr) + " - src:" + str(src) + " - dest:" + str(dst) + " - length: ", length
273 # Get and count the traffic for the specified MAC address
275 if dst == macaddress:
276 # Check if timestamp already exists in the map
277 # If yes, then just increment the frequency value...
278 if timestr in timelen:
279 timelen[timestr] = timelen[timestr] + int(length)
280 else: # If not, then put the value one there
281 timelen[timestr] = int(length)
283 if src == macaddress:
284 # Check if timestamp already exists in the map
285 # If yes, then just increment the frequency value...
286 if timestr in timelen:
287 timelen[timestr] = timelen[timestr] + int(length)
288 else: # If not, then put the value one there
289 timelen[timestr] = int(length)
294 if __name__ == '__main__':