From d7f125b817d20937c580b48286c712491223e878 Mon Sep 17 00:00:00 2001 From: rtrimana Date: Thu, 9 Nov 2017 08:51:16 -0800 Subject: [PATCH] Establishing basic flow for the complete graph processing --- base_gefx_generator.py | 122 +++++------------- devicelist.dat | 1 + .../extract_from_tshark.py | 0 3 files changed, 36 insertions(+), 87 deletions(-) rename extract_from_tshark.py => origin/extract_from_tshark.py (100%) diff --git a/base_gefx_generator.py b/base_gefx_generator.py index aa27905..4031e0c 100644 --- a/base_gefx_generator.py +++ b/base_gefx_generator.py @@ -38,13 +38,16 @@ JSON_KEY_ETH_DST = "eth.dst" JSON_KEY_IP = "ip" JSON_KEY_IP_SRC = "ip.src" JSON_KEY_IP_DST = "ip.dst" +# Checked protocols JSON_KEY_UDP = "udp" JSON_KEY_TCP = "tcp" -JSON_KEY_MDNS = "mdns" -JSON_KEY_BOOTP = "bootp" -JSON_KEY_SSDP = "ssdp" -JSON_KEY_DHCPV6 = "dhcpv6" -JSON_KEY_LLMNR = "llmnr" +# List of checked protocols +listchkprot = [ "bootp", + "dhcpv6", + "dns", + "llmnr", + "mdns", + "ssdp" ] def parse_json(file_path): @@ -66,7 +69,7 @@ def parse_json(file_path): # First parse the file once, constructing a map that contains information about individual devices' DNS resolutions. device_dns_mappings = parser.parse_dns.parse_json_dns(file_path) # "./json/eth1.dump.json" - + # Init empty graph G = nx.DiGraph() # Parse file again, this time constructing a graph of device<->server and device<->device communication. @@ -74,31 +77,19 @@ def parse_json(file_path): # Read JSON. # data becomes reference to root JSON object (or in our case json array) data = json.load(jf) + # Loop through json objects (packets) in data for p in data: # p is a JSON object, not an index # Drill down to object containing data from the different layers layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] - # Skip all MDNS traffic. - if JSON_KEY_MDNS in layers: - continue - - # Skip all LLMNR traffic. - if JSON_KEY_LLMNR in layers: - continue - - # Skip all SSDP traffic - we don't care about disovery, only the actual communication. - if JSON_KEY_SSDP in layers: - continue - - # Skip all bootp traffic (DHCP related) - if JSON_KEY_BOOTP in layers: - continue - - # Skip DHCPv6 for now. - if JSON_KEY_DHCPV6 in layers: - continue + iscontinue = False + for prot in listchkprot: + if prot in layers: + iscontinue = True + if iscontinue: + continue # Skip any non udp/non tcp traffic if JSON_KEY_UDP not in layers and JSON_KEY_TCP not in layers: @@ -118,22 +109,23 @@ def parse_json(file_path): ip_src = layers[JSON_KEY_IP][JSON_KEY_IP_SRC] ip_dst = layers[JSON_KEY_IP][JSON_KEY_IP_DST] - # ipre = re.compile(r'\b192.168.[0-9.]+') - # src_is_local = ipre.search(ip_src) - # dst_is_local = ipre.search(ip_dst) + # Categorize source and destination IP addresses: local vs. non-local + ipre = re.compile(r'\b192.168.[0-9.]+') + src_is_local = ipre.search(ip_src) + dst_is_local = ipre.search(ip_dst) print "ip.src =", ip_src, "ip.dst =", ip_dst - src_is_local = ip_src.startswith("192.168.") - dst_is_local = ip_dst.startswith("192.168.") src_node = None dst_node = None - if src_is_local: G.add_node(eth_src, Name=devlist[eth_src]) src_node = eth_src else: - # If the source is not local, then it's inbound traffic, and hence the eth_dst is the MAC of the IoT device. - hostname = device_dns_mappings[eth_dst].hostname_for_ip_at_time(ip_src, packet_timestamp) + hostname = None + # Check first if the key (eth_dst) exists in the dictionary + if eth_dst in device_dns_mappings: + # If the source is not local, then it's inbound traffic, and hence the eth_dst is the MAC of the IoT device. + hostname = device_dns_mappings[eth_dst].hostname_for_ip_at_time(ip_src, packet_timestamp) if hostname is None: # Use IP if no hostname mapping hostname = ip_src @@ -143,8 +135,11 @@ def parse_json(file_path): G.add_node(eth_dst, Name=devlist[eth_src]) dst_node = eth_dst else: - # If the destination is not local, then it's outbound traffic, and hence the eth_src is the MAC of the IoT device. - hostname = device_dns_mappings[eth_src].hostname_for_ip_at_time(ip_dst, packet_timestamp) + hostname = None + # Check first if the key (eth_dst) exists in the dictionary + if eth_src in device_dns_mappings: + # If the destination is not local, then it's outbound traffic, and hence the eth_src is the MAC of the IoT device. + hostname = device_dns_mappings[eth_src].hostname_for_ip_at_time(ip_dst, packet_timestamp) if hostname is None: # Use IP if no hostname mapping hostname = ip_dst @@ -152,58 +147,11 @@ def parse_json(file_path): dst_node = hostname G.add_edge(src_node, dst_node) -# # Traffic can be both outbound and inbound. -# # Determine which one of the two by looking up device MAC in DNS map. -# iot_device = None -# if eth_src in device_dns_mappings: -# iot_device = eth_src -# elif eth_dst in device_dns_mappings: -# iot_device = eth_dst -# else: -# # print "[ WARNING: DNS mapping not found for device with MAC", eth_src, "OR", eth_dst, "]" -# # This must be local communication between two IoT devices OR an IoT device talking to a hardcoded IP. -# # For now let's assume local communication. -# # Add a node for each device and an edge between them. -# G.add_node(eth_src, Name=devlist[eth_src]) -# G.add_node(eth_dst, Name=devlist[eth_src]) -# G.add_edge(eth_src, eth_dst) -# # TODO add regex check on src+dst IP to figure out if hardcoded server IP (e.g. check if one of the two are NOT a 192.168.x.y IP) -# continue -# # It is outbound traffic if iot_device matches src, otherwise it must be inbound traffic. -# outbound_traffic = iot_device == eth_src - - - -# ''' Graph construction ''' -# # No need to check if the Nodes and/or Edges we add already exist: -# # NetworkX won't add already existing nodes/edges (except in the case of a MultiGraph or MultiDiGraph (see NetworkX doc)). - -# # Add a node for each host. -# # First add node for IoT device. -# G.add_node(iot_device, Name=devlist[eth_src]) -# # Then add node for the server. -# # For this we need to distinguish between outbound and inbound traffic so that we look up the proper IP in our DNS map. -# # For outbound traffic, the server's IP is the destination IP. -# # For inbound traffic, the server's IP is the source IP. - -# server_ip = ip_dst if outbound_traffic else ip_src -# hostname = device_dns_mappings[iot_device].hostname_for_ip_at_time(server_ip, packet_timestamp) -# if hostname is None: -# # TODO this can occur when two local devices communicate OR if IoT device has hardcoded server IP. -# # However, we only get here for the DNS that have not performed any DNS lookups -# # We should use a regex check early in the loop to see if it is two local devices communicating. -# # This way we would not have to consider these corner cases later on. -# # print "[ WARNING: no ip-hostname mapping found for ip", server_ip, " -- adding eth.src->eth.dst edge, but note that this may be incorrect if IoT device has hardcoded server IP ]" -# G.add_node(eth_src, Name=devlist[eth_src]) -# G.add_node(eth_dst, Name=devlist[eth_src]) -# G.add_edge(eth_src, eth_dst) -# continue -# G.add_node(hostname) -# # Connect the two nodes we just added. -# if outbound_traffic: -# G.add_edge(iot_device, hostname) -# else: -# G.add_edge(hostname, iot_device) + # Print DNS mapping for reference + for mac in device_dns_mappings: + ddm = device_dns_mappings[mac] + ddm.print_mappings() + return G # ------------------------------------------------------ diff --git a/devicelist.dat b/devicelist.dat index 4f054fd..87eef4d 100644 --- a/devicelist.dat +++ b/devicelist.dat @@ -24,3 +24,4 @@ b0:b9:8a:73:69:8e, RouterPort_Bridge-LAN b0:b9:8a:73:69:8f, RouterPort_ETH1 b0:b9:8a:73:69:90, RouterPort_WLAN0 b0:b9:8a:73:69:91, RouterPort_WLAN1 +74:da:38:0d:05:55, RaspberryPi_Controller diff --git a/extract_from_tshark.py b/origin/extract_from_tshark.py similarity index 100% rename from extract_from_tshark.py rename to origin/extract_from_tshark.py -- 2.34.1