1 from sklearn.cluster import KMeans
2 import matplotlib.cm as cm
4 import matplotlib.pyplot as plt
6 # Create a subplot with 1 row and 2 columns
7 fig, (ax2) = plt.subplots(1, 1)
8 fig.set_size_inches(7, 7)
12 # TODO: Just change the following path and filename
13 # when needed to read from a different file
14 path = "/scratch/July-2018/Pairs/"
15 filename = "dlink-off.txt"
17 # Read and create an array of pairs
18 with open(path + filename, "r") as pairs:
21 # We will see a pair and we need to split it into xpoint and ypoint
22 xpoint, ypoint = line.split(", ")
23 pair = [int(xpoint), int(ypoint)]
26 # Formed array of pairs
28 X = np.array(pairsArr);
32 # Plot the data points based on the clusters
33 clusterer = KMeans(n_clusters=clusters, random_state=10)
34 cluster_labels = clusterer.fit_predict(X)
35 # 2nd Plot showing the actual clusters formed
36 colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters)
37 ax2.scatter(X[:, 0], X[:, 1], marker='o', s=50, lw=0, alpha=0.3,
38 c=colors, edgecolor='k')
40 # Labeling the clusters
41 centers = clusterer.cluster_centers_
42 # Label with cluster centers and frequencies
43 for i, c in enumerate(centers):
44 mark = '[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i))
45 ax2.scatter(c[0], c[1], marker='$%s$' % mark, alpha=1, s=3000, edgecolor='k')
46 print('[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i)))
48 ax2.set_title("The visualization of the clustered data.")
49 ax2.set_xlabel("Feature space for the 1st feature")
50 ax2.set_ylabel("Feature space for the 2nd feature")