From 1416e59e7aab22305c694aadcb647c662c985bcb Mon Sep 17 00:00:00 2001 From: rtrimana Date: Fri, 2 Feb 2018 14:00:00 -0800 Subject: [PATCH] Removing bipartite_iot_web_gexf_generator.py; the bipartite feature has been merged into base_gexf_generator.py --- bipartite_iot_web_gexf_generator.py | 185 ---------------------------- 1 file changed, 185 deletions(-) delete mode 100644 bipartite_iot_web_gexf_generator.py diff --git a/bipartite_iot_web_gexf_generator.py b/bipartite_iot_web_gexf_generator.py deleted file mode 100644 index 6c5c62e..0000000 --- a/bipartite_iot_web_gexf_generator.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/python - -""" -Extension of base_gefx_generator.py. -This script constructs a bipartite graph with IoT devices on one side and Internet hosts on the other side. -As a result, this graph does NOT show inter IoT device communication. - -The input to this script is the Wirshark's/tshark's JSON representation of a packet trace. - -""" - -import socket -import json -import tldextract -import networkx as nx - -from networkx.algorithms import bipartite - -import sys -import csv -import re -from decimal import * - -import parser.parse_dns - -DEVICE_MAC_LIST = "devicelist.dat" -COLUMN_MAC = "MAC_address" -COLUMN_DEVICE_NAME = "device_name" - - -JSON_KEY_SOURCE = "_source" -JSON_KEY_LAYERS = "layers" -JSON_KEY_FRAME = "frame" -JSON_KEY_FRAME_TIME_EPOCH = "frame.time_epoch" -JSON_KEY_ETH = "eth" -JSON_KEY_ETH_SRC = "eth.src" -JSON_KEY_ETH_DST = "eth.dst" -JSON_KEY_IP = "ip" -JSON_KEY_IP_SRC = "ip.src" -JSON_KEY_IP_DST = "ip.dst" -JSON_KEY_UDP = "udp" -JSON_KEY_TCP = "tcp" -JSON_KEY_MDNS = "mdns" -JSON_KEY_BOOTP = "bootp" -JSON_KEY_SSDP = "ssdp" -JSON_KEY_DHCPV6 = "dhcpv6" -JSON_KEY_LLMNR = "llmnr" - - -def parse_json(file_path): - - # Open the device MAC list file - with open(DEVICE_MAC_LIST) as csvfile: - maclist = csv.DictReader(csvfile, (COLUMN_MAC, COLUMN_DEVICE_NAME)) - crudelist = list() - for item in maclist: - crudelist.append(item) - #print(item) - # Create key-value dictionary - devlist = dict() - for item in crudelist: - devlist[item[COLUMN_MAC]] = item[COLUMN_DEVICE_NAME] - - # First parse the file once, constructing a map that contains information about individual devices' DNS resolutions. - device_dns_mappings = parser.parse_dns.parse_json_dns(file_path) # "./json/eth1.dump.json" - - # Init empty graph - G = nx.DiGraph() - # Parse file again, this time constructing a graph of device<->server and device<->device communication. - with open(file_path) as jf: - # Read JSON. - # data becomes reference to root JSON object (or in our case json array) - data = json.load(jf) - # Loop through json objects (packets) in data - for p in data: - # p is a JSON object, not an index - # Drill down to object containing data from the different layers - layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] - - # Skip all MDNS traffic. - if JSON_KEY_MDNS in layers: - continue - - # Skip all LLMNR traffic. - if JSON_KEY_LLMNR in layers: - continue - - # Skip all SSDP traffic - we don't care about disovery, only the actual communication. - if JSON_KEY_SSDP in layers: - continue - - # Skip all bootp traffic (DHCP related) - if JSON_KEY_BOOTP in layers: - continue - - # Skip DHCPv6 for now. - if JSON_KEY_DHCPV6 in layers: - continue - - # Skip any non udp/non tcp traffic - if JSON_KEY_UDP not in layers and JSON_KEY_TCP not in layers: - continue - - # Skip any non IP traffic - if JSON_KEY_IP not in layers: - continue - - # Fetch timestamp of packet (router's timestamp) - packet_timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH]) - # Fetch source and destination MACs - eth = layers.get(JSON_KEY_ETH, None) - if eth is None: - print "[ WARNING: eth data not found ]" - continue - eth_src = eth.get(JSON_KEY_ETH_SRC, None) - eth_dst = eth.get(JSON_KEY_ETH_DST, None) - # And source and destination IPs - ip_src = layers[JSON_KEY_IP][JSON_KEY_IP_SRC] - ip_dst = layers[JSON_KEY_IP][JSON_KEY_IP_DST] - - src_is_local = ip_src.startswith("192.168.") - dst_is_local = ip_dst.startswith("192.168.") - - src_node = None - dst_node = None - - # Values for the 'bipartite' attribute of a node when constructing the bipartite graph - bipartite_iot = 0 - bipartite_web_server = 1 - - # Skip inter-IoT device communication. - if src_is_local and dst_is_local: - continue - - if src_is_local: - G.add_node(eth_src, Name=devlist[eth_src], bipartite=bipartite_iot) - src_node = eth_src - else: - # If the source is not local, then it's inbound traffic, and hence the eth_dst is the MAC of the IoT device. - hostname = None - # Guard against cases where a device does not perform DNS lookups (or the lookups occur before data collection starts) - if eth_dst in device_dns_mappings: - hostname = device_dns_mappings[eth_dst].hostname_for_ip_at_time(ip_src, packet_timestamp) - else: - print "[ WARNING: No entry for", eth_dst, "in DNS query map ]" - - if hostname is None: - # Use IP if no hostname mapping - hostname = ip_src - G.add_node(hostname, bipartite=bipartite_web_server) - src_node = hostname - if dst_is_local: - G.add_node(eth_dst, Name=devlist[eth_src], bipartite=bipartite_iot) - dst_node = eth_dst - else: - # If the destination is not local, then it's outbound traffic, and hence the eth_src is the MAC of the IoT device. - hostname = None - # Guard against cases where a device does not perform DNS lookups (or the lookups occur before data collection starts) - if eth_src in device_dns_mappings: - hostname = device_dns_mappings[eth_src].hostname_for_ip_at_time(ip_dst, packet_timestamp) - else: - print "[ WARNING: No entry for", eth_src, "in DNS query map ]" - if hostname is None: - # Use IP if no hostname mapping - hostname = ip_dst - G.add_node(hostname, bipartite=bipartite_web_server) - dst_node = hostname - G.add_edge(src_node, dst_node) - return G - -if __name__ == '__main__': - if len(sys.argv) < 3: - print "Usage:", sys.argv[0], "input_file output_file" - print "outfile_file should end in .gexf" - sys.exit(0) - # Input file: Path to Wireshark/tshark JSON file. - input_file = sys.argv[1] - print "[ input_file =", input_file, "]" - # Output file: Path to file where the Gephi XML should be written. - output_file = sys.argv[2] - print "[ output_file =", output_file, "]" - # Construct graph from JSON - G = parse_json(input_file) - # Write Graph in Graph Exchange XML format - nx.write_gexf(G, output_file) -- 2.34.1