+++ /dev/null
-#!/usr/bin/python\r
-\r
-"""\r
-Script used to extract only the needed information from JSON packet traces generated by\r
-tshark from PCAPNG format\r
-"""\r
-\r
-import os, sys\r
-import json\r
-import uuid\r
-\r
-from collections import OrderedDict\r
-\r
-json_key_source = "_source"\r
-json_key_layers = "layers"\r
-\r
-json_key_ip = "ip"\r
-json_key_tcp = "tcp"\r
-\r
-json_key_http = "http"\r
-json_key_method = "method"\r
-json_key_uri = "uri"\r
-json_key_headers = "headers"\r
-json_key_host = "host"\r
-\r
-json_key_http_req = json_key_http + ".request."\r
-json_key_http_req_method = json_key_http_req + json_key_method\r
-json_key_http_req_uri = json_key_http_req + json_key_uri\r
-json_key_http_req_line = json_key_http_req + "line"\r
-\r
-json_key_pkt_comment = "pkt_comment"\r
-\r
-json_key_frame = "frame"\r
-json_key_frame_num = json_key_frame + ".number"\r
-json_key_frame_comment = json_key_frame + ".comment"\r
-json_key_frame_ts = json_key_frame + ".time_epoch"\r
-\r
-\r
-JSON_KEY_ETH = "eth"\r
-JSON_KEY_ETH_SRC = "eth.src"\r
-JSON_KEY_ETH_DST = "eth.dst"\r
-\r
-\r
-def make_unique(key, dct):\r
- counter = 0\r
- unique_key = key\r
-\r
- while unique_key in dct:\r
- counter += 1\r
- unique_key = '{}_{}'.format(key, counter)\r
- return unique_key\r
-\r
-\r
-def parse_object_pairs(pairs):\r
- dct = OrderedDict()\r
- for key, value in pairs:\r
- if key in dct:\r
- key = make_unique(key, dct)\r
- dct[key] = value\r
-\r
- return dct\r
-\r
-def change_file(fpath):\r
- for fn in os.listdir(fpath):\r
- full_path = fpath + '/' + fn\r
-\r
- # Recursively go through all directories\r
- if os.path.isdir(full_path):\r
- change_file(full_path)\r
- continue\r
-\r
- print full_path\r
- with open(full_path, "r+") as jf:\r
- # Since certain json 'keys' appear multiple times in our data, we have to make them\r
- # unique first (we can't use regular json.load() or we lose some data points). From:\r
- # https://stackoverflow.com/questions/29321677/python-json-parser-allow-duplicate-keys\r
- decoder = json.JSONDecoder(object_pairs_hook=parse_object_pairs)\r
- pcap_data = decoder.decode(jf.read())\r
-\r
- # Prepare new data structure for re-formatted JSON storage\r
- data = {}\r
- for packet in pcap_data:\r
- layers = packet[json_key_source][json_key_layers]\r
-\r
- # All captured traffic should have a frame + frame number, but check anyway\r
- frame_num = " Frame: "\r
- if json_key_frame not in layers or json_key_frame_num not in layers[json_key_frame]:\r
- print "WARNING: could not find frame number! Using -1..."\r
- frame_num = frame_num + "-1"\r
- else:\r
- # Save frame number for error-reporting\r
- frame_num = frame_num + layers[json_key_frame][json_key_frame_num]\r
-\r
- # All captured traffic should be IP, but check anyway\r
- if not json_key_ip in layers:\r
- print "WARNING: Non-IP traffic detected!" + frame_num\r
- continue\r
-\r
- # For now, focus on HTTP only\r
- if json_key_tcp not in layers or json_key_http not in layers:\r
- continue\r
-\r
- # Fill our new JSON packet with TCP/IP info\r
- new_packet = {}\r
- new_packet["dst_ip"] = layers[json_key_ip][json_key_ip + ".dst"]\r
- new_packet["dst_port"] = int(layers[json_key_tcp][json_key_tcp + ".dstport"])\r
-\r
- # JV: Also include src so we can see what device initiates the traffic\r
- new_packet["src_ip"] = layers[json_key_ip][json_key_ip + ".src"]\r
- new_packet["src_port"] = int(layers[json_key_tcp][json_key_tcp + ".srcport"])\r
- #JV: Also include eth soure/destination info so that we can map traffic to physical device using MAC\r
- new_packet[JSON_KEY_ETH_SRC] = layers[JSON_KEY_ETH][JSON_KEY_ETH_SRC]\r
- new_packet[JSON_KEY_ETH_DST] = layers[JSON_KEY_ETH][JSON_KEY_ETH_DST]\r
-\r
- # Go through all HTTP fields and extract the ones that are needed\r
- http_data = layers[json_key_http]\r
- for http_key in http_data:\r
- http_value = http_data[http_key]\r
-\r
- if http_key.startswith(json_key_http_req_line):\r
- header_line = http_value.split(":", 1)\r
- if len(header_line) != 2:\r
- print ("WARNING: could not parse header '" + str(header_line) + "'"\r
- + frame_num)\r
- continue\r
-\r
- # Prepare container for HTTP headers\r
- if json_key_headers not in new_packet:\r
- new_packet[json_key_headers] = {}\r
-\r
- # Use lower case for header keys to stay consistent with our other data\r
- header_key = header_line[0].lower()\r
-\r
- # Remove the trailing carriage return\r
- header_val = header_line[1].strip()\r
-\r
- # Save the header key-value pair\r
- new_packet[json_key_headers][header_key] = header_val\r
-\r
- # If this is the host header, we also save it to the main object\r
- if header_key == json_key_host:\r
- new_packet[json_key_host] = header_val\r
-\r
- if json_key_http_req_method in http_value:\r
- new_packet[json_key_method] = http_value[json_key_http_req_method]\r
- if json_key_http_req_uri in http_value:\r
- new_packet[json_key_uri] = http_value[json_key_http_req_uri]\r
-\r
- # End of HTTP parsing\r
-\r
- # Check that we found the minimum needed HTTP headers\r
- if (json_key_uri not in new_packet or json_key_method not in new_packet or\r
- json_key_host not in new_packet):\r
- print "Missing some HTTP Headers!" + frame_num\r
- continue\r
-\r
- # Extract timestamp\r
- if json_key_frame_ts not in layers[json_key_frame]:\r
- print "WARNING: could not find timestamp!" + frame_num\r
- continue\r
-\r
- new_packet["ts"] = layers[json_key_frame][json_key_frame_ts]\r
-\r
- # Create a unique key for each packet to keep consistent with ReCon\r
- # Also good in case packets end up in different files\r
- data[str(uuid.uuid4())] = new_packet\r
-\r
- # Write the new data\r
- #print json.dumps(data, sort_keys=True, indent=4)\r
- jf.seek(0)\r
- jf.write(json.dumps(data, sort_keys=True, indent=4))\r
- jf.truncate()\r
-\r
-if __name__ == '__main__':\r
- # Needed to re-use some JSON keys\r
- change_file(sys.argv[1])
\ No newline at end of file