From 82489cd1792874402858ded7dc0054032b0ca7a7 Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Wed, 1 Nov 2017 13:46:37 -0700 Subject: [PATCH] Update DNS parser to also construct IP-> hostname map [NOTE: This is the SIMPLEST SOLUTION and does not consider cases where one IP maps to multiple hostnames nor does it consider timing of the mappings -- serves only as a starting point] --- parse_dns.py | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/parse_dns.py b/parse_dns.py index 4671a64..51d1d32 100644 --- a/parse_dns.py +++ b/parse_dns.py @@ -24,18 +24,36 @@ def main(): if len(sys.argv) < 2: print "Usage: python", sys.argv[0], "input_file" return - hn_ip_map = parse_json(sys.argv[1]) + maps_tuple = parse_json(sys.argv[1]) + + # print hostname to ip map + hn_ip_map = maps_tuple[0] for hn in hn_ip_map.keys(): print "=====================================================================" print hn, "maps to:" for ip in hn_ip_map[hn]: print " -", ip print "=====================================================================" + + print " " + + # print ip to hostname map + ip_hn_map = maps_tuple[1] + for ip in ip_hn_map.keys(): + print "=====================================================================" + print ip, "maps to:" + for hn in ip_hn_map[ip]: + print " -", hn + print "=====================================================================" -# Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs. +# Convert JSON file containing DNS traffic to a tuple with two maps. +# Index 0 of the tuple is a map in which a hostname points to its set of associated IPs. +# Index 1 of the tuple is a map in which an ip points to its set of associated hostnames. def parse_json(file_path): # Maps hostnames to IPs host_ip_mappings = defaultdict(set) + # Maps ips to hostnames + ip_host_mappings = defaultdict(set) with open(file_path) as jf: # Read JSON. # data becomes reference to root JSON object (or in our case json array) @@ -72,7 +90,9 @@ def parse_json(file_path): hostname = find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME]) # Add mapping of hostname to ip to our data structure host_ip_mappings[hostname].add(ip) - return host_ip_mappings + # Add mapping of ip to hostname to our data structure + ip_host_mappings[ip].add(hostname) + return (host_ip_mappings, ip_host_mappings) # Recursively traverse set of answers trying to find the top most alias for a canonical name def find_alias_hostname(answers, hostname): @@ -88,4 +108,19 @@ def find_alias_hostname(answers, hostname): return hostname if __name__ == '__main__': - main() \ No newline at end of file + main() + +# ================================================================================================ +# Notes/brainstorming how to do ip to host mappings. + +# Maps IPs to hostnames. Uses a dictionary of dictionaries. +# IP lookup in the outer dictionary returns a dictionary that has hostnames as keys. +# Looking up a hostname in the inner dictionary returns a set of timestamps. +# Each timestamp indicate the time at which the IP<->hostname mapping was determined by a DNS query. +# Note that the keyset of the inner dictionary will be of size 1 in most cases. +# When this is the case, the value (the set of timestamps) can be ignored. +# The values are only relevant when one IP maps to more than 1 hostname. +# When this the case, the timestamps must be considered to find the most recent mapping. +# ip_host_mappings = defaultdict(defaultdict(set)) + +# ================================================================================================ \ No newline at end of file -- 2.34.1