From 2f4cf75e2964b82e70ed590bcc5604e5970b657b Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Wed, 8 Nov 2017 15:04:01 -0800 Subject: [PATCH] Update base_gefx_generator.py to new pipeline --- base_gefx_generator.py | 198 ++++++++++++++++++++++++++++++----------- parser/parse_dns.py | 2 +- 2 files changed, 147 insertions(+), 53 deletions(-) diff --git a/base_gefx_generator.py b/base_gefx_generator.py index ca3aba7..aa27905 100644 --- a/base_gefx_generator.py +++ b/base_gefx_generator.py @@ -18,6 +18,7 @@ import tldextract import networkx as nx import sys import csv +import re from decimal import * import parser.parse_dns @@ -26,8 +27,25 @@ DEVICE_MAC_LIST = "devicelist.dat" COLUMN_MAC = "MAC_address" COLUMN_DEVICE_NAME = "device_name" + +JSON_KEY_SOURCE = "_source" +JSON_KEY_LAYERS = "layers" +JSON_KEY_FRAME = "frame" +JSON_KEY_FRAME_TIME_EPOCH = "frame.time_epoch" +JSON_KEY_ETH = "eth" JSON_KEY_ETH_SRC = "eth.src" JSON_KEY_ETH_DST = "eth.dst" +JSON_KEY_IP = "ip" +JSON_KEY_IP_SRC = "ip.src" +JSON_KEY_IP_DST = "ip.dst" +JSON_KEY_UDP = "udp" +JSON_KEY_TCP = "tcp" +JSON_KEY_MDNS = "mdns" +JSON_KEY_BOOTP = "bootp" +JSON_KEY_SSDP = "ssdp" +JSON_KEY_DHCPV6 = "dhcpv6" +JSON_KEY_LLMNR = "llmnr" + def parse_json(file_path): @@ -46,70 +64,146 @@ def parse_json(file_path): #for key, value in devlist.iteritems(): # print key + " => " + value - device_dns_mappings = parser.parse_dns.parse_json_dns("./json/dns.json") + # First parse the file once, constructing a map that contains information about individual devices' DNS resolutions. + device_dns_mappings = parser.parse_dns.parse_json_dns(file_path) # "./json/eth1.dump.json" # Init empty graph G = nx.DiGraph() + # Parse file again, this time constructing a graph of device<->server and device<->device communication. with open(file_path) as jf: # Read JSON. # data becomes reference to root JSON object (or in our case json array) data = json.load(jf) - # Loop through json objects in data - for k in data: - # Fetch timestamp of packet - packet_timestamp = Decimal(data[k]["ts"]) - # Fetch eth source and destination info - eth_src = data[k][JSON_KEY_ETH_SRC] - eth_dst = data[k][JSON_KEY_ETH_DST] - # Traffic can be both outbound and inbound. - # Determine which one of the two by looking up device MAC in DNS map. - iot_device = None - if eth_src in device_dns_mappings: - iot_device = eth_src - elif eth_dst in device_dns_mappings: - iot_device = eth_dst - else: -# print "[ WARNING: DNS mapping not found for device with MAC", eth_src, "OR", eth_dst, "]" - # This must be local communication between two IoT devices OR an IoT device talking to a hardcoded IP. - # For now let's assume local communication. - # Add a node for each device and an edge between them. - G.add_node(eth_src, Name=devlist[eth_src]) - G.add_node(eth_dst, Name=devlist[eth_src]) - G.add_edge(eth_src, eth_dst) - # TODO add regex check on src+dst IP to figure out if hardcoded server IP (e.g. check if one of the two are NOT a 192.168.x.y IP) + # Loop through json objects (packets) in data + for p in data: + # p is a JSON object, not an index + # Drill down to object containing data from the different layers + layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] + + # Skip all MDNS traffic. + if JSON_KEY_MDNS in layers: continue - # It is outbound traffic if iot_device matches src, otherwise it must be inbound traffic. - outbound_traffic = iot_device == eth_src - ''' Graph construction ''' - # No need to check if the Nodes and/or Edges we add already exist: - # NetworkX won't add already existing nodes/edges (except in the case of a MultiGraph or MultiDiGraph (see NetworkX doc)). - - # Add a node for each host. - # First add node for IoT device. - G.add_node(iot_device, Name=devlist[eth_src]) - # Then add node for the server. - # For this we need to distinguish between outbound and inbound traffic so that we look up the proper IP in our DNS map. - # For outbound traffic, the server's IP is the destination IP. - # For inbound traffic, the server's IP is the source IP. - server_ip = data[k]["dst_ip"] if outbound_traffic else data[k]["src_ip"] - hostname = device_dns_mappings[iot_device].hostname_for_ip_at_time(server_ip, packet_timestamp) - if hostname is None: - # TODO this can occur when two local devices communicate OR if IoT device has hardcoded server IP. - # However, we only get here for the DNS that have not performed any DNS lookups - # We should use a regex check early in the loop to see if it is two local devices communicating. - # This way we would not have to consider these corner cases later on. -# print "[ WARNING: no ip-hostname mapping found for ip", server_ip, " -- adding eth.src->eth.dst edge, but note that this may be incorrect if IoT device has hardcoded server IP ]" + # Skip all LLMNR traffic. + if JSON_KEY_LLMNR in layers: + continue + + # Skip all SSDP traffic - we don't care about disovery, only the actual communication. + if JSON_KEY_SSDP in layers: + continue + + # Skip all bootp traffic (DHCP related) + if JSON_KEY_BOOTP in layers: + continue + + # Skip DHCPv6 for now. + if JSON_KEY_DHCPV6 in layers: + continue + + # Skip any non udp/non tcp traffic + if JSON_KEY_UDP not in layers and JSON_KEY_TCP not in layers: + continue + + # Fetch timestamp of packet (router's timestamp) + packet_timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH]) + print "timestamp", packet_timestamp + # Fetch source and destination MACs + eth = layers.get(JSON_KEY_ETH, None) + if eth is None: + print "[ WARNING: eth data not found ]" + continue + eth_src = eth.get(JSON_KEY_ETH_SRC, None) + eth_dst = eth.get(JSON_KEY_ETH_DST, None) + # And source and destination IPs + ip_src = layers[JSON_KEY_IP][JSON_KEY_IP_SRC] + ip_dst = layers[JSON_KEY_IP][JSON_KEY_IP_DST] + + # ipre = re.compile(r'\b192.168.[0-9.]+') + # src_is_local = ipre.search(ip_src) + # dst_is_local = ipre.search(ip_dst) + print "ip.src =", ip_src, "ip.dst =", ip_dst + src_is_local = ip_src.startswith("192.168.") + dst_is_local = ip_dst.startswith("192.168.") + + src_node = None + dst_node = None + + if src_is_local: G.add_node(eth_src, Name=devlist[eth_src]) + src_node = eth_src + else: + # If the source is not local, then it's inbound traffic, and hence the eth_dst is the MAC of the IoT device. + hostname = device_dns_mappings[eth_dst].hostname_for_ip_at_time(ip_src, packet_timestamp) + if hostname is None: + # Use IP if no hostname mapping + hostname = ip_src + G.add_node(hostname) + src_node = hostname + if dst_is_local: G.add_node(eth_dst, Name=devlist[eth_src]) - G.add_edge(eth_src, eth_dst) - continue - G.add_node(hostname) - # Connect the two nodes we just added. - if outbound_traffic: - G.add_edge(iot_device, hostname) + dst_node = eth_dst else: - G.add_edge(hostname, iot_device) + # If the destination is not local, then it's outbound traffic, and hence the eth_src is the MAC of the IoT device. + hostname = device_dns_mappings[eth_src].hostname_for_ip_at_time(ip_dst, packet_timestamp) + if hostname is None: + # Use IP if no hostname mapping + hostname = ip_dst + G.add_node(hostname) + dst_node = hostname + G.add_edge(src_node, dst_node) + +# # Traffic can be both outbound and inbound. +# # Determine which one of the two by looking up device MAC in DNS map. +# iot_device = None +# if eth_src in device_dns_mappings: +# iot_device = eth_src +# elif eth_dst in device_dns_mappings: +# iot_device = eth_dst +# else: +# # print "[ WARNING: DNS mapping not found for device with MAC", eth_src, "OR", eth_dst, "]" +# # This must be local communication between two IoT devices OR an IoT device talking to a hardcoded IP. +# # For now let's assume local communication. +# # Add a node for each device and an edge between them. +# G.add_node(eth_src, Name=devlist[eth_src]) +# G.add_node(eth_dst, Name=devlist[eth_src]) +# G.add_edge(eth_src, eth_dst) +# # TODO add regex check on src+dst IP to figure out if hardcoded server IP (e.g. check if one of the two are NOT a 192.168.x.y IP) +# continue +# # It is outbound traffic if iot_device matches src, otherwise it must be inbound traffic. +# outbound_traffic = iot_device == eth_src + + + +# ''' Graph construction ''' +# # No need to check if the Nodes and/or Edges we add already exist: +# # NetworkX won't add already existing nodes/edges (except in the case of a MultiGraph or MultiDiGraph (see NetworkX doc)). + +# # Add a node for each host. +# # First add node for IoT device. +# G.add_node(iot_device, Name=devlist[eth_src]) +# # Then add node for the server. +# # For this we need to distinguish between outbound and inbound traffic so that we look up the proper IP in our DNS map. +# # For outbound traffic, the server's IP is the destination IP. +# # For inbound traffic, the server's IP is the source IP. + +# server_ip = ip_dst if outbound_traffic else ip_src +# hostname = device_dns_mappings[iot_device].hostname_for_ip_at_time(server_ip, packet_timestamp) +# if hostname is None: +# # TODO this can occur when two local devices communicate OR if IoT device has hardcoded server IP. +# # However, we only get here for the DNS that have not performed any DNS lookups +# # We should use a regex check early in the loop to see if it is two local devices communicating. +# # This way we would not have to consider these corner cases later on. +# # print "[ WARNING: no ip-hostname mapping found for ip", server_ip, " -- adding eth.src->eth.dst edge, but note that this may be incorrect if IoT device has hardcoded server IP ]" +# G.add_node(eth_src, Name=devlist[eth_src]) +# G.add_node(eth_dst, Name=devlist[eth_src]) +# G.add_edge(eth_src, eth_dst) +# continue +# G.add_node(hostname) +# # Connect the two nodes we just added. +# if outbound_traffic: +# G.add_edge(iot_device, hostname) +# else: +# G.add_edge(hostname, iot_device) return G # ------------------------------------------------------ diff --git a/parser/parse_dns.py b/parser/parse_dns.py index 831b075..4b88337 100644 --- a/parser/parse_dns.py +++ b/parser/parse_dns.py @@ -125,7 +125,7 @@ def parse_json_dns(file_path): dns = layers.get(JSON_KEY_DNS, None) # Skip any non DNS traffic if dns is None: - print "[ WARNING: Non DNS traffic ]" + #print "[ WARNING: Non DNS traffic ]" continue # We only care about DNS responses as these also contain a copy of the query that they answer answers = dns.get(JSON_KEY_ANSWERS, None) -- 2.34.1