From: Janus Varmarken Date: Sat, 4 Nov 2017 02:08:58 +0000 (-0700) Subject: Update parse_dns.py with new datastructure that captures all dns requests per device... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f3b9bcd6ed5b269ac1c26d98424d2bb13a89ccb0;p=pingpong.git Update parse_dns.py with new datastructure that captures all dns requests per device [NOTE: needs testing!] --- diff --git a/parse_dns.py b/parse_dns.py index 3bb7d0b..e81fd95 100644 --- a/parse_dns.py +++ b/parse_dns.py @@ -9,6 +9,9 @@ IP addresses that is associated with that hostname. import sys import json from collections import defaultdict +from decimal import * + +ROUTER_MAC = "b0:b9:8a:73:69:8e" JSON_KEY_SOURCE = "_source" JSON_KEY_LAYERS = "layers" @@ -19,32 +22,40 @@ JSON_KEY_DNS_RESP_TYPE = "dns.resp.type" JSON_KEY_DNS_A = "dns.a" # Key for retrieving IP. 'a' for type A DNS record. JSON_KEY_DNS_RESP_NAME = "dns.resp.name" JSON_KEY_DNS_CNAME = "dns.cname" +JSON_KEY_ETH = "eth" +JSON_KEY_ETH_DST = "eth.dst" +JSON_KEY_FRAME = "frame" +JSON_KEY_FRAME_TIME_EPOCH = "frame.time_epoch" def main(): if len(sys.argv) < 2: print "Usage: python", sys.argv[0], "input_file" return - maps_tuple = parse_json_dns(sys.argv[1]) + mac_to_ddm = parse_json_dns(sys.argv[1]) + for mac in mac_to_ddm: + ddm = mac_to_ddm[mac] + ddm.print_mappings() + # maps_tuple = parse_json_dns(sys.argv[1]) - # print hostname to ip map - hn_ip_map = maps_tuple[0] - for hn in hn_ip_map.keys(): - print "=====================================================================" - print hn, "maps to:" - for ip in hn_ip_map[hn]: - print " -", ip - print "=====================================================================" + # # print hostname to ip map + # hn_ip_map = maps_tuple[0] + # for hn in hn_ip_map.keys(): + # print "=====================================================================" + # print hn, "maps to:" + # for ip in hn_ip_map[hn]: + # print " -", ip + # print "=====================================================================" - print " " + # print " " - # print ip to hostname map - ip_hn_map = maps_tuple[1] - for ip in ip_hn_map.keys(): - print "=====================================================================" - print ip, "maps to:" - for hn in ip_hn_map[ip]: - print " -", hn - print "=====================================================================" + # # print ip to hostname map + # ip_hn_map = maps_tuple[1] + # for ip in ip_hn_map.keys(): + # print "=====================================================================" + # print ip, "maps to:" + # for hn in ip_hn_map[ip]: + # print " -", hn + # print "=====================================================================" class DeviceDNSMap: def __init__(self, mac_address): @@ -74,26 +85,32 @@ class DeviceDNSMap: return best_fit def add_mapping(self, ip, timestamp_hostname_tuple): - self.ip_mappings[ip].add(timestamp_hostname_tuple) + self.ip_mappings[ip].append(timestamp_hostname_tuple) + + def print_mappings(self): + count = 0 + print "### Mappings for MAC = ", self.mac, "###" + for ip in self.ip_mappings: + print "--- IP ", ip, " maps to: ---" + for t in self.ip_mappings[ip]: + print t[1], "at epoch time =", t[0] + count += 1 + print "### Total of", count, "mappings for", self.mac, "###" # -------------------------------------------------------------------------- # Define eq and hash such that instances of the class can be used as keys in dictionaries. # Equality is based on MAC as a MAC uniquely identifies the device. def __eq__(self, another): - return hasattr(another, 'mac') and self.mac == another.mac + return hasattr(another, 'mac') and self.mac == another.mac def __hash__(self): return hash(self.data) # -------------------------------------------------------------------------- -# Convert JSON file containing DNS traffic to a tuple with two maps. -# Index 0 of the tuple is a map in which a hostname points to its set of associated IPs. -# Index 1 of the tuple is a map in which an ip points to its set of associated hostnames. def parse_json_dns(file_path): - # Maps hostnames to IPs - host_ip_mappings = defaultdict(set) - # Maps ips to hostnames - ip_host_mappings = defaultdict(set) + # Our end output: dictionary of MAC addresses with DeviceDNSMaps as values. + # Each DeviceDNSMap contains DNS lookups performed by the device with the corresponding MAC. + result = defaultdict() with open(file_path) as jf: # Read JSON. # data becomes reference to root JSON object (or in our case json array) @@ -118,6 +135,20 @@ def parse_json_dns(file_path): if len(queries.keys()) > 1: # Unclear if script will behave correctly for DNS lookups with multiple queries print "[ WARNING: Multi query DNS lookup ]" + # Get ethernet information for identifying the device performing the DNS lookup. + eth = layers.get(JSON_KEY_ETH, None) + if eth is None: + print "[ WARNING: eth data not found ]" + continue + # As this is a response to a DNS query, the IoT device is the destination. + # Get the device MAC of that device. + device_mac = eth.get(JSON_KEY_ETH_DST, None) + if device_mac is None: + print "[ WARNING: eth.dst data not found ]" + continue + # Get the router's timestamp for this packet + # so that we can mark when the DNS mapping occurred + timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH]) for ak in answers.keys(): a = answers[ak] # We are looking for type A records as these are the ones that contain the IP. @@ -128,11 +159,19 @@ def parse_json_dns(file_path): # The answer may be the canonical name. # Now trace back the answer stack, looking for any higher level aliases. hostname = find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME]) - # Add mapping of hostname to ip to our data structure - host_ip_mappings[hostname].add(ip) - # Add mapping of ip to hostname to our data structure - ip_host_mappings[ip].add(hostname) - return (host_ip_mappings, ip_host_mappings) + # Create the tuple that indicates WHEN the ip to hostname mapping occurred + timestamp_hostname_tuple = (timestamp,hostname) + if device_mac in result: + # If we already have DNS data for the device with this MAC: + # Add the mapping to the DeviceDNSMap that is already present in the dict. + result[device_mac].add_mapping(ip, timestamp_hostname_tuple) + else: + # No DNS data for this device yet: + # Create a new DeviceDNSMap, add the mapping, and at it to the dict. + ddm = DeviceDNSMap(device_mac) + ddm.add_mapping(ip, timestamp_hostname_tuple) + result[device_mac] = ddm + return result # Recursively traverse set of answers trying to find the top most alias for a canonical name def find_alias_hostname(answers, hostname):