X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=parser%2Fparse_packet_frequency.py;h=4d0fea21912049ff065cb6c19ef6be20da06f3ac;hb=f860c2e4c37d3d0670af6ff037da70795bd5e960;hp=5bc1a35bd9f3e57d7eedaeeb253bb48c7582cbfc;hpb=38e85fbaaea37e40593772f1867aaae3eafb9bae;p=pingpong.git diff --git a/parser/parse_packet_frequency.py b/parser/parse_packet_frequency.py index 5bc1a35..4d0fea2 100644 --- a/parser/parse_packet_frequency.py +++ b/parser/parse_packet_frequency.py @@ -10,6 +10,8 @@ import json import numpy as np from collections import defaultdict from dateutil import parser +from datetime import datetime +from decimal import * JSON_KEY_SOURCE = "_source" JSON_KEY_LAYERS = "layers" @@ -21,10 +23,17 @@ JSON_KEY_FRAME = "frame" JSON_KEY_FRAME_TIME = "frame.time" TABLE_HEADER_X = "Timestamp (hh:mm:ss)" TABLE_HEADER_Y = "Packet frequency (pps)" +INCOMING_APPENDIX = "_incoming" +OUTGOING_APPENDIX = "_outgoing" +FILE_APPENDIX = ".dat" # Use this constant as a flag WINDOW_SIZE = 5 -USE_MOVING_AVERAGE = True +USE_MOVING_AVERAGE = False +USE_BINNING = True +# Range = 6, i.e. 3 to left and 3 to right (in seconds) +TOTAL_RANGE = 60 # TOTAL_RANGE = 2 x RANGE +RANGE = 30 def moving_average(array, window=3): @@ -50,23 +59,78 @@ def moving_average(array, window=3): retarr[window - 1:] = retarr[window - 1:] / window return retarr +def hms_to_seconds(t): + """ Calculate hms to seconds + Args: + t = time in hh:mm:ss string + Adapted from: + https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds + """ + h, m, s = [int(i) for i in t.split(':')] + return 3600*h + 60*m + s + +def seconds_to_hms(t): + """ Calculate seconds to hms + Args: + t = time in seconds + Adapted from: + https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds + """ + h = t / 3600 + m = (t - (h * 3600)) / 60 + s = t - (h * 3600) - (m * 60) + hh = str(h) + if len(hh) is 1: + hh = "0" + hh + mm = str(m) + if len(mm) is 1: + mm = "0" + mm + ss = str(s) + if len(ss) is 1: + ss = "0" + ss + return hh + ":" + mm + ":" + ss + +def include_timestamps_zero_packets(timefreq): + """ Include every second that has zero packets (no packets/transmission) + Args: + timefreq = dictionary that maps timestamps to number of packets + """ + sortedkeylist = [] + for key in sorted(timefreq): + sortedkeylist.append(key) + first = sortedkeylist[0] + last = sortedkeylist[len(sortedkeylist)-1] + # Calculate the number of seconds between first and last packets + first_seconds = hms_to_seconds(first) + last_seconds = hms_to_seconds(last) + seconds = last_seconds - first_seconds + # Start counting and filling in timestamps with zero packets + counter = 0 + while counter < seconds: + timestamp = seconds_to_hms(first_seconds + counter) + if timestamp not in timefreq: + timefreq[timestamp] = 0 + counter += 1 + return timefreq + -def save_to_file(tbl_header, dictionary, filename_out): +def save_to_file(tblheader, dictionary, filenameout): """ Show summary of statistics of PCAP file Args: - tbl_header: header for the saved table + tblheader: header for the saved table dictionary: dictionary to be saved filename_out: file name to save """ # Appending, not overwriting! - f = open(filename_out, 'a') + f = open(filenameout, 'a') # Write the table header - f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n"); + f.write("# " + tblheader + "\n") + f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n") # Write "0 0" if dictionary is empty if not dictionary: - f.write("0 0"); + f.write("0 0") f.close() - print "Writing zeroes to file: ", filename_out + print "Writing zeroes to file: ", filenameout return if USE_MOVING_AVERAGE: @@ -82,13 +146,56 @@ def save_to_file(tbl_header, dictionary, filename_out): # Space separated f.write(str(key) + " " + str(valarr[ind]) + "\n") ind += 1 + + elif USE_BINNING: + sortedlist = [] + # Iterate over dictionary and write (key, value) pairs + ind = 0 + first = 0 + last = 0 + for key in sorted(dictionary): + sortedlist.append(key) + print "Key: ", key, " - Value: ", dictionary[key], " - Ind: ", ind + ind += 1 + first = hms_to_seconds(sortedlist[0]) + #print "First: ", key + last = hms_to_seconds(sortedlist[ind-1]) + #print "Last: ", key + resultdict = dict() + # Put new binning keys + time_ind = first + ind = 0 + while time_ind < last: + # Initialize with the first key in the list + curr_key = sortedlist[ind] + curr_key_secs = hms_to_seconds(curr_key) + # Initialize with 0 first + resultdict[time_ind] = 0 + # Check if this is still within RANGE - bin the value if it is + while time_ind - RANGE <= curr_key_secs and curr_key_secs <= time_ind + RANGE: + resultdict[time_ind] += dictionary[curr_key] + print "Time index: ", seconds_to_hms(time_ind), " Value: ", resultdict[time_ind] + ind += 1 + if ind > len(dictionary)-1: + break + # Initialize with the key in the list + curr_key = sortedlist[ind] + curr_key_secs = hms_to_seconds(curr_key) + # Increment time index + time_ind += TOTAL_RANGE + # Now write to file after binning + for key in sorted(resultdict): + # Space separated + f.write(seconds_to_hms(key) + " " + str(resultdict[key]) + "\n") + #print seconds_to_hms(key) + " " + str(resultdict[key]) + else: # Iterate over dictionary and write (key, value) pairs for key in sorted(dictionary): # Space separated f.write(str(key) + " " + str(dictionary[key]) + "\n") f.close() - print "Writing output to file: ", filename_out + print "Writing output to file: ", filenameout def main(): @@ -98,9 +205,17 @@ def main(): print "Usage: python", sys.argv[0], " " return # Parse the file for the specified MAC address - time_freq = parse_json(sys.argv[1], sys.argv[4]) + timefreq_incoming = parse_json(sys.argv[1], sys.argv[4], True) + timefreq_incoming = include_timestamps_zero_packets(timefreq_incoming) + timefreq_outgoing = parse_json(sys.argv[1], sys.argv[4], False) + timefreq_outgoing = include_timestamps_zero_packets(timefreq_outgoing) # Write statistics into file - save_to_file(sys.argv[3], time_freq, sys.argv[2]) + print "=====================================================================" + print "==> Analyzing incoming traffic ..." + save_to_file(sys.argv[3] + INCOMING_APPENDIX, timefreq_incoming, sys.argv[2] + INCOMING_APPENDIX + FILE_APPENDIX) + print "=====================================================================" + print "==> Analyzing outgoing traffic ..." + save_to_file(sys.argv[3] + OUTGOING_APPENDIX, timefreq_outgoing, sys.argv[2] + OUTGOING_APPENDIX + FILE_APPENDIX) print "=====================================================================" #for time in time_freq.keys(): #for key in sorted(time_freq): @@ -109,15 +224,17 @@ def main(): # Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs. -def parse_json(file_path, mac_address): +def parse_json(filepath, macaddress, incomingoutgoing): """ Show summary of statistics of PCAP file Args: - file_path: path of the read file - mac_address: MAC address of a device to analyze + filepath: path of the read file + macaddress: MAC address of a device to analyze + incomingoutgoing: boolean to define whether we collect incoming or outgoing traffic + True = incoming, False = outgoing """ # Maps timestamps to frequencies of packets - time_freq = dict() - with open(file_path) as jf: + timefreq = dict() + with open(filepath) as jf: # Read JSON. # data becomes reference to root JSON object (or in our case json array) data = json.load(jf) @@ -128,7 +245,7 @@ def parse_json(file_path, mac_address): layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] # Get timestamp frame = layers.get(JSON_KEY_FRAME, None) - date_time = frame.get(JSON_KEY_FRAME_TIME, None) + datetime = frame.get(JSON_KEY_FRAME_TIME, None) # Get into the Ethernet address part eth = layers.get(JSON_KEY_ETH, None) # Skip any non DNS traffic @@ -139,19 +256,29 @@ def parse_json(file_path, mac_address): src = eth.get(JSON_KEY_ETH_SRC, None) dst = eth.get(JSON_KEY_ETH_DST, None) # Get just the time part - date_time_obj = parser.parse(date_time) + datetimeobj = parser.parse(datetime) # Remove the microsecond part - time_str = str(date_time_obj.time())[:8] - print str(time_str) + " - src:" + str(src) + " - dest:" + str(dst) + timestr = str(datetimeobj.time())[:8] + print str(timestr) + " - src:" + str(src) + " - dest:" + str(dst) # Get and count the traffic for the specified MAC address - if src == mac_address or dst == mac_address: - # Check if timestamp already exists in the map - # If yes, then just increment the frequency value... - if time_str in time_freq: - time_freq[time_str] = time_freq[time_str] + 1 - else: # If not, then put the value one there - time_freq[time_str] = 1 - return time_freq + if incomingoutgoing: + if dst == macaddress: + # Check if timestamp already exists in the map + # If yes, then just increment the frequency value... + if timestr in timefreq: + timefreq[timestr] = timefreq[timestr] + 1 + else: # If not, then put the value one there + timefreq[timestr] = 1 + else: + if src == macaddress: + # Check if timestamp already exists in the map + # If yes, then just increment the frequency value... + if timestr in timefreq: + timefreq[timestr] = timefreq[timestr] + 1 + else: # If not, then put the value one there + timefreq[timestr] = 1 + + return timefreq if __name__ == '__main__':