From: rtrimana <rtrimana@uci.edu>
Date: Thu, 23 Aug 2018 23:22:16 +0000 (-0700)
Subject: Adding plotting of points with centroid coordinates and frequencies.
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=fe98963c15be4a8f3ab4cddafdeba5bf7c233bf9;p=pingpong.git

Adding plotting of points with centroid coordinates and frequencies.
---

diff --git a/python_ml/dlink_clustering.py b/python_ml/dlink_clustering.py
index badd5b1..82d445e 100644
--- a/python_ml/dlink_clustering.py
+++ b/python_ml/dlink_clustering.py
@@ -1,6 +1,45 @@
 from sklearn.cluster import KMeans
+import matplotlib.cm as cm
 import numpy as np
+import matplotlib.pyplot as plt
+
+# Create a subplot with 1 row and 2 columns
+fig, (ax2) = plt.subplots(1, 1)
+fig.set_size_inches(7, 7)
+
 X = np.array([[132, 192], [117, 960], [117, 962], [1343, 0], [117, 1109], [117, 1110], [117, 1111], [117, 1116], [117, 1117], [117, 1118], [117, 1119], [1015, 0], [117, 966]])
-kmeans = KMeans(n_clusters=5, random_state=0).fit(X)
-print(kmeans.labels_)
-print(kmeans.labels_.tolist().count(3))
+#kmeans = KMeans(n_clusters=5, random_state=0).fit(X)
+#print(kmeans.labels_)
+#print(kmeans.labels_.tolist().count(3))
+clusters = 5
+
+# Plot the data points based on the clusters
+clusterer = KMeans(n_clusters=clusters, random_state=10)
+cluster_labels = clusterer.fit_predict(X)
+# 2nd Plot showing the actual clusters formed
+colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters)
+ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, lw=0, alpha=0.3,
+            c=colors, edgecolor='k')
+
+# Labeling the clusters
+centers = clusterer.cluster_centers_
+# Label with cluster centers and frequencies
+for i, c in enumerate(centers):
+	mark = '[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i))
+	ax2.scatter(c[0], c[1], marker='$%s$' % mark, alpha=1, s=3000, edgecolor='k')
+
+# Draw white circles at cluster centers
+#ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
+#            c="white", alpha=1, s=200, edgecolor='k')
+
+#for i, c in enumerate(centers):
+#    ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
+#                s=50, edgecolor='k')
+#for i, c in enumerate(centers):
+#	print(c[0], c[1])
+
+ax2.set_title("The visualization of the clustered data.")
+ax2.set_xlabel("Feature space for the 1st feature")
+ax2.set_ylabel("Feature space for the 2nd feature")
+plt.show()
+
diff --git a/python_ml/silhouette.py b/python_ml/silhouette.py
index bf8c1eb..3ddca71 100644
--- a/python_ml/silhouette.py
+++ b/python_ml/silhouette.py
@@ -27,21 +27,21 @@ range_n_clusters = [2, 3, 4, 5, 6]
 
 for n_clusters in range_n_clusters:
     # Create a subplot with 1 row and 2 columns
-#    fig, (ax1, ax2) = plt.subplots(1, 2)
-#    fig.set_size_inches(18, 7)
+    fig, (ax1, ax2) = plt.subplots(1, 2)
+    fig.set_size_inches(18, 7)
 
     # The 1st subplot is the silhouette plot
     # The silhouette coefficient can range from -1, 1 but in this example all
     # lie within [-0.1, 1]
-#    ax1.set_xlim([-0.1, 1])
+    ax1.set_xlim([-0.1, 1])
     # The (n_clusters+1)*10 is for inserting blank space between silhouette
     # plots of individual clusters, to demarcate them clearly.
-#    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])
+    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])
 
     # Initialize the clusterer with n_clusters value and a random generator
     # seed of 10 for reproducibility.
-#    clusterer = KMeans(n_clusters=n_clusters, random_state=20)
-#    cluster_labels = clusterer.fit_predict(X)
+    clusterer = KMeans(n_clusters=n_clusters, random_state=10)
+    cluster_labels = clusterer.fit_predict(X)
 
     # The silhouette_score gives the average value for all the samples.
     # This gives a perspective into the density and separation of the formed
@@ -53,7 +53,7 @@ for n_clusters in range_n_clusters:
     # Compute the silhouette scores for each sample
     sample_silhouette_values = silhouette_samples(X, cluster_labels)
 
-'''    y_lower = 10
+    y_lower = 10
     for i in range(n_clusters):
         # Aggregate the silhouette scores for samples belonging to
         # cluster i, and sort them