1 from sklearn.cluster import DBSCAN
2 from sklearn import metrics
3 import matplotlib.cm as cm
5 import matplotlib.pyplot as plt
7 # metric function for clustering
9 # Compare 2 datapoints in array element 2 and 3 that contains C or S
10 if x[2] != y[2] or x[3] != y[3]:
11 # We are not going to cluster these together since they have different directions
14 # Compute Euclidian distance here
15 return math.sqrt((x[0] - y[0])**2 + (x[1] - y[1])**2)
17 # Create a subplot with 1 row and 2 columns
18 fig, (ax2) = plt.subplots(1, 1)
19 fig.set_size_inches(20, 20)
22 # TODO: Just change the following path and filename
23 # when needed to read from a different file
24 path = "/scratch/July-2018/Pairs3/"
25 # TODO: Change the order of the files below to generate
26 # the diff plot reversedly
27 device1 = "kwikset-off-phone-side"
28 device2 = "kwikset-on-phone-side"
29 filename1 = device1 + ".txt"
30 filename2 = device2 + ".txt"
37 # PLOTTING FOR DEVICE ON EVENT
38 # Read and create an array of pairs
39 with open(path + filename1, "r") as pairs:
41 pairsSrcLabels1 = list()
43 # We will see a pair and we need to split it into xpoint and ypoint
44 xpoint, ypoint, srcHost1, srcHost2, src1, src2 = line.split(", ")
45 # Assign 1000 for client and 0 for server to create distance
46 src1Val = 1000 if src1 == 'C' else 0
47 src2Val = 1000 if src2 == 'C' else 0
48 pair = [int(xpoint), int(ypoint), int(src1Val), int(src2Val)]
49 pairSrc = [int(xpoint), int(ypoint), srcHost1, srcHost2, src1, src2]
50 # Array of actual points
51 pairsArr1.append(pair)
52 # Array of source labels
53 pairsSrcLabels1.append(pairSrc)
55 # PLOTTING FOR DEVICE ON EVENT
56 # Read and create an array of pairs
57 with open(path + filename2, "r") as pairs:
59 pairsSrcLabels2 = list()
61 # We will see a pair and we need to split it into xpoint and ypoint
62 xpoint, ypoint, srcHost1, srcHost2, src1, src2 = line.split(", ")
63 # Assign 1000 for client and 0 for server to create distance
64 src1Val = 1000 if src1 == 'C' else 0
65 src2Val = 1000 if src2 == 'C' else 0
66 pair = [int(xpoint), int(ypoint), int(src1Val), int(src2Val)]
67 pairSrc = [int(xpoint), int(ypoint), srcHost1, srcHost2, src1, src2]
68 # Array of actual points
69 pairsArr2.append(pair)
70 # Array of source labels
71 pairsSrcLabels2.append(pairSrc)
73 diff12 = [i for i in pairsArr1 if i not in pairsArr2]
74 diff12SrcLabels = [i for i in pairsSrcLabels1 if i not in pairsSrcLabels2]
80 # min_samples = minimum number of members of a cluster
81 db = DBSCAN(eps=10, min_samples=trig - 45).fit(X)
82 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
83 core_samples_mask[db.core_sample_indices_] = True
86 # Number of clusters in labels, ignoring noise if present.
87 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
89 # Black removed and is used for noise instead.
90 unique_labels = set(labels)
92 colors = [plt.cm.Spectral(each)
93 for each in np.linspace(0, 1, len(unique_labels))]
94 for k, col in zip(unique_labels, colors):
95 cluster_col = [1, 0, 0, 1]
97 # Black used for noise.
100 class_member_mask = (labels == k)
102 # print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k)))
103 xy = X[class_member_mask & core_samples_mask]
104 plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(cluster_col),
105 markeredgecolor='k', markersize=10)
107 xy = X[class_member_mask & ~core_samples_mask]
108 plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
109 markeredgecolor='k', markersize=6)
114 if labels[count] == -1:
115 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10)
117 # Only print the frequency when this is a real cluster
118 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) +
119 " - Freq:" + str(labels.tolist().count(labels[count])), fontsize=10)
122 # Print source-destination labels
124 for pair in diff12SrcLabels:
125 # Only print the frequency when this is a real cluster
126 plt.text(pair[0], pair[1], str(pair[4]) + "->" + str(pair[5]))
129 plt.title(device1 + ' - diff - ' + device2)