From 0d574b6e2d0a00639b07d0f10dc40f27def955e8 Mon Sep 17 00:00:00 2001 From: rtrimana Date: Mon, 10 Sep 2018 15:32:59 -0700 Subject: [PATCH] Fixing scripts and adding a script to create diff plot between two lists of datapoints. --- python_ml/plotting-dbscan-diff.py | 96 +++++++++++++++++++++++++++++++ python_ml/plotting-dbscan.py | 16 ++++-- 2 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 python_ml/plotting-dbscan-diff.py diff --git a/python_ml/plotting-dbscan-diff.py b/python_ml/plotting-dbscan-diff.py new file mode 100644 index 0000000..ba89a15 --- /dev/null +++ b/python_ml/plotting-dbscan-diff.py @@ -0,0 +1,96 @@ +from sklearn.cluster import DBSCAN +from sklearn import metrics +import matplotlib.cm as cm +import numpy as np +import matplotlib.pyplot as plt + +# Create a subplot with 1 row and 2 columns +fig, (ax2) = plt.subplots(1, 1) +fig.set_size_inches(7, 7) + +# Read from file +# TODO: Just change the following path and filename +# when needed to read from a different file +path = "/scratch/July-2018/Pairs2/" +# TODO: Change the order of the files below to generate +# the diff plot reversedly +device1 = "dlink-siren-device-off" +device2 = "dlink-siren-device-on" +filename1 = device1 + ".txt" +filename2 = device2 + ".txt" +plt.ylim(0, 2000) +plt.xlim(0, 2000) + +# Number of triggers +trig = 50 + +# PLOTTING FOR DEVICE ON EVENT +# Read and create an array of pairs +with open(path + filename1, "r") as pairs: + pairsArr1 = list() + for line in pairs: + # We will see a pair and we need to split it into xpoint and ypoint + xpoint, ypoint = line.split(", ") + pair = [int(xpoint), int(ypoint)] + pairsArr1.append(pair) + +# PLOTTING FOR DEVICE ON EVENT +# Read and create an array of pairs +with open(path + filename2, "r") as pairs: + pairsArr2 = list() + for line in pairs: + # We will see a pair and we need to split it into xpoint and ypoint + xpoint, ypoint = line.split(", ") + pair = [int(xpoint), int(ypoint)] + pairsArr2.append(pair) + +diff12 = [i for i in pairsArr1 if i not in pairsArr2] + +X = np.array(diff12); + +# Compute DBSCAN +# eps = distances +# min_samples = minimum number of members of a cluster +db = DBSCAN(eps=10, min_samples=trig - 45).fit(X) +core_samples_mask = np.zeros_like(db.labels_, dtype=bool) +core_samples_mask[db.core_sample_indices_] = True +labels = db.labels_ + +# Number of clusters in labels, ignoring noise if present. +n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + +# Black removed and is used for noise instead. +unique_labels = set(labels) + +colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] +for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + + class_member_mask = (labels == k) + + # print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k))) + xy = X[class_member_mask & core_samples_mask] + plt.plot(xy[:, 0], xy[:, 1], 'o', + markeredgecolor='k', markersize=10) + + xy = X[class_member_mask & ~core_samples_mask] + plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=6) + +count = 0 +for pair in diff12: + if labels[count] == -1: + plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10) + else: + # Only print the frequency when this is a real cluster + plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + + " - Freq:" + str(labels.tolist().count(labels[count])), fontsize=10) + count = count + 1 + +plt.title(device1 + ' - diff - ' + device2) +plt.show() + + diff --git a/python_ml/plotting-dbscan.py b/python_ml/plotting-dbscan.py index bdfc1d5..9b5d1ab 100644 --- a/python_ml/plotting-dbscan.py +++ b/python_ml/plotting-dbscan.py @@ -13,8 +13,10 @@ fig.set_size_inches(7, 7) # TODO: Just change the following path and filename # when needed to read from a different file path = "/scratch/July-2018/Pairs2/" -device = "alexa2-off" +device = "dlink-siren-device-off" filename = device + ".txt" +plt.ylim(0, 2000) +plt.xlim(0, 2000) # Number of triggers trig = 50 @@ -35,7 +37,9 @@ X = np.array(pairsArr); # Compute DBSCAN # eps = distances # min_samples = minimum number of members of a cluster -db = DBSCAN(eps=20, min_samples=trig - 5).fit(X) +#db = DBSCAN(eps=20, min_samples=trig - 5).fit(X) +# TODO: This is just for seeing more clusters +db = DBSCAN(eps=20, min_samples=trig - 45).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ @@ -43,13 +47,13 @@ labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) -print('Estimated number of clusters: %d' % n_clusters_) +#print('Estimated number of clusters: %d' % n_clusters_) import matplotlib.pyplot as plt # Black removed and is used for noise instead. unique_labels = set(labels) -print("Labels: " + str(labels)) +#print("Labels: " + str(labels)) colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))] @@ -79,11 +83,11 @@ for pair in pairsArr: else: # Only print the frequency when this is a real cluster plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + - "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10) + " - Freq: " + str(labels.tolist().count(labels[count])), fontsize=10) count = count + 1 -plt.title(device + ' - Estimated number of clusters: %d' % n_clusters_) +plt.title(device + ' - Clusters: %d' % n_clusters_) plt.show() -- 2.34.1