4 Script that takes a file (output by wireshark/tshark, in JSON format) and analyze
5 the traffic frequency of a certain device at a certain time.
11 from collections import defaultdict
12 from dateutil import parser
14 JSON_KEY_SOURCE = "_source"
15 JSON_KEY_LAYERS = "layers"
18 JSON_KEY_ETH_DST = "eth.dst"
19 JSON_KEY_ETH_SRC = "eth.src"
20 JSON_KEY_FRAME = "frame"
21 JSON_KEY_FRAME_TIME = "frame.time"
22 TABLE_HEADER_X = "Timestamp (hh:mm:ss)"
23 TABLE_HEADER_Y = "Packet frequency (pps)"
25 # Use this constant as a flag
27 USE_MOVING_AVERAGE = True
30 def moving_average(array, window=3):
31 """ Calculate moving average
33 array: array of numbers
34 window: window of moving average (default = 3)
36 https://stackoverflow.com/questions/14313510/how-to-calculate-moving-average-using-numpy
38 # Check if window > len(array)
39 if window > len(array):
41 # Calculate cumulative sum of each array element
42 retarr = np.cumsum(array, dtype=float)
43 # Adjust cumulative sum of each array element
44 # based on window size
45 retarr[window:] = retarr[window:] - retarr[:-window]
46 # Pad the first array elements with zeroes
47 retarr[:window - 1] = np.zeros(window - 1)
48 # Calculate moving average starting from the element
49 # at window size, e.g. element 4 for window=5
50 retarr[window - 1:] = retarr[window - 1:] / window
54 def save_to_file(tbl_header, dictionary, filename_out):
55 """ Show summary of statistics of PCAP file
57 tbl_header: header for the saved table
58 dictionary: dictionary to be saved
59 filename_out: file name to save
61 # Appending, not overwriting!
62 f = open(filename_out, 'a')
63 # Write the table header
64 f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n");
65 # Write "0 0" if dictionary is empty
69 print "Writing zeroes to file: ", filename_out
72 if USE_MOVING_AVERAGE:
73 # Use moving average if this flag is true
75 for key in sorted(dictionary):
76 sortedarr.append(dictionary[key])
77 valarr = moving_average(sortedarr, WINDOW_SIZE)
79 # Iterate over dictionary and write (key, value) pairs
81 for key in sorted(dictionary):
83 f.write(str(key) + " " + str(valarr[ind]) + "\n")
86 # Iterate over dictionary and write (key, value) pairs
87 for key in sorted(dictionary):
89 f.write(str(key) + " " + str(dictionary[key]) + "\n")
91 print "Writing output to file: ", filename_out
98 print "Usage: python", sys.argv[0], "<input_file> <output_file> <device_name> <mac_address>"
100 # Parse the file for the specified MAC address
101 time_freq = parse_json(sys.argv[1], sys.argv[4])
102 # Write statistics into file
103 save_to_file(sys.argv[3], time_freq, sys.argv[2])
104 print "====================================================================="
105 #for time in time_freq.keys():
106 #for key in sorted(time_freq):
107 # print key, " => ", time_freq[key]
108 #print "====================================================================="
111 # Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs.
112 def parse_json(file_path, mac_address):
113 """ Show summary of statistics of PCAP file
115 file_path: path of the read file
116 mac_address: MAC address of a device to analyze
118 # Maps timestamps to frequencies of packets
120 with open(file_path) as jf:
122 # data becomes reference to root JSON object (or in our case json array)
124 # Loop through json objects in data
125 # Each entry is a pcap entry (request/response (packet) and associated metadata)
127 # p is a JSON object, not an index
128 layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
130 frame = layers.get(JSON_KEY_FRAME, None)
131 date_time = frame.get(JSON_KEY_FRAME_TIME, None)
132 # Get into the Ethernet address part
133 eth = layers.get(JSON_KEY_ETH, None)
134 # Skip any non DNS traffic
136 print "[ WARNING: Packet has no ethernet address! ]"
138 # Get source and destination MAC addresses
139 src = eth.get(JSON_KEY_ETH_SRC, None)
140 dst = eth.get(JSON_KEY_ETH_DST, None)
141 # Get just the time part
142 date_time_obj = parser.parse(date_time)
143 # Remove the microsecond part
144 time_str = str(date_time_obj.time())[:8]
145 print str(time_str) + " - src:" + str(src) + " - dest:" + str(dst)
146 # Get and count the traffic for the specified MAC address
147 if src == mac_address or dst == mac_address:
148 # Check if timestamp already exists in the map
149 # If yes, then just increment the frequency value...
150 if time_str in time_freq:
151 time_freq[time_str] = time_freq[time_str] + 1
152 else: # If not, then put the value one there
153 time_freq[time_str] = 1
157 if __name__ == '__main__':