python_ml/plotting.py

   1 from sklearn.cluster import KMeans
   2 import matplotlib.cm as cm
   3 import numpy as np
   4 import matplotlib.pyplot as plt
   5
   6 # Create a subplot with 1 row and 2 columns
   7 fig, (ax2) = plt.subplots(1, 1)
   8 fig.set_size_inches(7, 7)
   9
  10
  11 # Read from file
  12 # TODO: Just change the following path and filename
  13 #       when needed to read from a different file
  14 path = "/scratch/July-2018/Pairs/"
  15 filename = "dlink-off.txt"
  16
  17 # Read and create an array of pairs
  18 with open(path + filename, "r") as pairs:
  19         pairsArr = []
  20         for line in pairs:
  21                 # We will see a pair and we need to split it into xpoint and ypoint
  22                 xpoint, ypoint = line.split(", ")
  23                 pair = [int(xpoint), int(ypoint)]
  24                 pairsArr.append(pair)
  25
  26 # Formed array of pairs
  27 #print(pairsArr)
  28 X = np.array(pairsArr);
  29
  30 clusters = 6
  31
  32 # Plot the data points based on the clusters
  33 clusterer = KMeans(n_clusters=clusters, random_state=10)
  34 cluster_labels = clusterer.fit_predict(X)
  35 # 2nd Plot showing the actual clusters formed
  36 colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters)
  37 ax2.scatter(X[:, 0], X[:, 1], marker='o', s=50, lw=0, alpha=0.3,
  38             c=colors, edgecolor='k')
  39
  40 # Labeling the clusters
  41 centers = clusterer.cluster_centers_
  42 # Label with cluster centers and frequencies
  43 for i, c in enumerate(centers):
  44         mark = '[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i))
  45         ax2.scatter(c[0], c[1], marker='$%s$' % mark, alpha=1, s=3000, edgecolor='k')
  46         print('[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i)))
  47
  48 ax2.set_title("The visualization of the clustered data.")
  49 ax2.set_xlabel("Feature space for the 1st feature")
  50 ax2.set_ylabel("Feature space for the 2nd feature")
  51 plt.show()