From 0d574b6e2d0a00639b07d0f10dc40f27def955e8 Mon Sep 17 00:00:00 2001
From: rtrimana <rtrimana@uci.edu>
Date: Mon, 10 Sep 2018 15:32:59 -0700
Subject: [PATCH] Fixing scripts and adding a script to create diff plot
 between two lists of datapoints.

---
 python_ml/plotting-dbscan-diff.py | 96 +++++++++++++++++++++++++++++++
 python_ml/plotting-dbscan.py      | 16 ++++--
 2 files changed, 106 insertions(+), 6 deletions(-)
 create mode 100644 python_ml/plotting-dbscan-diff.py

diff --git a/python_ml/plotting-dbscan-diff.py b/python_ml/plotting-dbscan-diff.py
new file mode 100644
index 0000000..ba89a15
--- /dev/null
+++ b/python_ml/plotting-dbscan-diff.py
@@ -0,0 +1,96 @@
+from sklearn.cluster import DBSCAN
+from sklearn import metrics
+import matplotlib.cm as cm
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Create a subplot with 1 row and 2 columns
+fig, (ax2) = plt.subplots(1, 1)
+fig.set_size_inches(7, 7)
+
+# Read from file
+# TODO: Just change the following path and filename 
+# 	when needed to read from a different file
+path = "/scratch/July-2018/Pairs2/"
+# TODO: Change the order of the files below to generate
+#		the diff plot reversedly
+device1 = "dlink-siren-device-off"
+device2 = "dlink-siren-device-on"
+filename1 = device1 + ".txt"
+filename2 = device2 + ".txt"
+plt.ylim(0, 2000)
+plt.xlim(0, 2000)
+
+# Number of triggers
+trig = 50
+
+# PLOTTING FOR DEVICE ON EVENT
+# Read and create an array of pairs
+with open(path + filename1, "r") as pairs:
+	pairsArr1 = list()
+	for line in pairs:
+		# We will see a pair and we need to split it into xpoint and ypoint
+		xpoint, ypoint = line.split(", ")
+		pair = [int(xpoint), int(ypoint)]
+		pairsArr1.append(pair)
+
+# PLOTTING FOR DEVICE ON EVENT
+# Read and create an array of pairs
+with open(path + filename2, "r") as pairs:
+	pairsArr2 = list()
+	for line in pairs:
+		# We will see a pair and we need to split it into xpoint and ypoint
+		xpoint, ypoint = line.split(", ")
+		pair = [int(xpoint), int(ypoint)]
+		pairsArr2.append(pair)
+
+diff12 = [i for i in pairsArr1 if i not in pairsArr2]
+
+X = np.array(diff12);
+
+# Compute DBSCAN
+# eps = distances
+# min_samples = minimum number of members of a cluster
+db = DBSCAN(eps=10, min_samples=trig - 45).fit(X)
+core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
+core_samples_mask[db.core_sample_indices_] = True
+labels = db.labels_
+
+# Number of clusters in labels, ignoring noise if present.
+n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+
+# Black removed and is used for noise instead.
+unique_labels = set(labels)
+
+colors = [plt.cm.Spectral(each)
+	      for each in np.linspace(0, 1, len(unique_labels))]
+for k, col in zip(unique_labels, colors):
+	if k == -1:
+	    # Black used for noise.
+	    col = [0, 0, 0, 1]
+
+	class_member_mask = (labels == k)
+
+	# print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k)))
+	xy = X[class_member_mask & core_samples_mask]
+	plt.plot(xy[:, 0], xy[:, 1], 'o',
+	         markeredgecolor='k', markersize=10)
+
+	xy = X[class_member_mask & ~core_samples_mask]
+	plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
+	         markeredgecolor='k', markersize=6)
+
+count = 0
+for pair in diff12:
+	if labels[count] == -1:
+		plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10)
+	else:
+	# Only print the frequency when this is a real cluster
+		plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + 
+			" - Freq:" + str(labels.tolist().count(labels[count])), fontsize=10)
+	count = count + 1
+
+plt.title(device1 + ' - diff - ' + device2)
+plt.show()
+
+
diff --git a/python_ml/plotting-dbscan.py b/python_ml/plotting-dbscan.py
index bdfc1d5..9b5d1ab 100644
--- a/python_ml/plotting-dbscan.py
+++ b/python_ml/plotting-dbscan.py
@@ -13,8 +13,10 @@ fig.set_size_inches(7, 7)
 # TODO: Just change the following path and filename 
 # 	when needed to read from a different file
 path = "/scratch/July-2018/Pairs2/"
-device = "alexa2-off"
+device = "dlink-siren-device-off"
 filename = device + ".txt"
+plt.ylim(0, 2000)
+plt.xlim(0, 2000)
 
 # Number of triggers
 trig = 50
@@ -35,7 +37,9 @@ X = np.array(pairsArr);
 # Compute DBSCAN
 # eps = distances
 # min_samples = minimum number of members of a cluster
-db = DBSCAN(eps=20, min_samples=trig - 5).fit(X)
+#db = DBSCAN(eps=20, min_samples=trig - 5).fit(X)
+# TODO: This is just for seeing more clusters
+db = DBSCAN(eps=20, min_samples=trig - 45).fit(X)
 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
 core_samples_mask[db.core_sample_indices_] = True
 labels = db.labels_
@@ -43,13 +47,13 @@ labels = db.labels_
 # Number of clusters in labels, ignoring noise if present.
 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
 
-print('Estimated number of clusters: %d' % n_clusters_)
+#print('Estimated number of clusters: %d' % n_clusters_)
 
 import matplotlib.pyplot as plt
 
 # Black removed and is used for noise instead.
 unique_labels = set(labels)
-print("Labels: " + str(labels))
+#print("Labels: " + str(labels))
 
 colors = [plt.cm.Spectral(each)
           for each in np.linspace(0, 1, len(unique_labels))]
@@ -79,11 +83,11 @@ for pair in pairsArr:
 	else:
 	# Only print the frequency when this is a real cluster
 		plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + 
-			"\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10)
+			" - Freq: " + str(labels.tolist().count(labels[count])), fontsize=10)
 	count = count + 1
 
 	
-plt.title(device + ' - Estimated number of clusters: %d' % n_clusters_)
+plt.title(device + ' - Clusters: %d' % n_clusters_)
 plt.show()
 
 
-- 
2.34.1