From: rtrimana Date: Thu, 23 Aug 2018 23:22:16 +0000 (-0700) Subject: Adding plotting of points with centroid coordinates and frequencies. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=fe98963c15be4a8f3ab4cddafdeba5bf7c233bf9;p=pingpong.git Adding plotting of points with centroid coordinates and frequencies. --- diff --git a/python_ml/dlink_clustering.py b/python_ml/dlink_clustering.py index badd5b1..82d445e 100644 --- a/python_ml/dlink_clustering.py +++ b/python_ml/dlink_clustering.py @@ -1,6 +1,45 @@ from sklearn.cluster import KMeans +import matplotlib.cm as cm import numpy as np +import matplotlib.pyplot as plt + +# Create a subplot with 1 row and 2 columns +fig, (ax2) = plt.subplots(1, 1) +fig.set_size_inches(7, 7) + X = np.array([[132, 192], [117, 960], [117, 962], [1343, 0], [117, 1109], [117, 1110], [117, 1111], [117, 1116], [117, 1117], [117, 1118], [117, 1119], [1015, 0], [117, 966]]) -kmeans = KMeans(n_clusters=5, random_state=0).fit(X) -print(kmeans.labels_) -print(kmeans.labels_.tolist().count(3)) +#kmeans = KMeans(n_clusters=5, random_state=0).fit(X) +#print(kmeans.labels_) +#print(kmeans.labels_.tolist().count(3)) +clusters = 5 + +# Plot the data points based on the clusters +clusterer = KMeans(n_clusters=clusters, random_state=10) +cluster_labels = clusterer.fit_predict(X) +# 2nd Plot showing the actual clusters formed +colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters) +ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, lw=0, alpha=0.3, + c=colors, edgecolor='k') + +# Labeling the clusters +centers = clusterer.cluster_centers_ +# Label with cluster centers and frequencies +for i, c in enumerate(centers): + mark = '[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i)) + ax2.scatter(c[0], c[1], marker='$%s$' % mark, alpha=1, s=3000, edgecolor='k') + +# Draw white circles at cluster centers +#ax2.scatter(centers[:, 0], centers[:, 1], marker='o', +# c="white", alpha=1, s=200, edgecolor='k') + +#for i, c in enumerate(centers): +# ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, +# s=50, edgecolor='k') +#for i, c in enumerate(centers): +# print(c[0], c[1]) + +ax2.set_title("The visualization of the clustered data.") +ax2.set_xlabel("Feature space for the 1st feature") +ax2.set_ylabel("Feature space for the 2nd feature") +plt.show() + diff --git a/python_ml/silhouette.py b/python_ml/silhouette.py index bf8c1eb..3ddca71 100644 --- a/python_ml/silhouette.py +++ b/python_ml/silhouette.py @@ -27,21 +27,21 @@ range_n_clusters = [2, 3, 4, 5, 6] for n_clusters in range_n_clusters: # Create a subplot with 1 row and 2 columns -# fig, (ax1, ax2) = plt.subplots(1, 2) -# fig.set_size_inches(18, 7) + fig, (ax1, ax2) = plt.subplots(1, 2) + fig.set_size_inches(18, 7) # The 1st subplot is the silhouette plot # The silhouette coefficient can range from -1, 1 but in this example all # lie within [-0.1, 1] -# ax1.set_xlim([-0.1, 1]) + ax1.set_xlim([-0.1, 1]) # The (n_clusters+1)*10 is for inserting blank space between silhouette # plots of individual clusters, to demarcate them clearly. -# ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10]) + ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10]) # Initialize the clusterer with n_clusters value and a random generator # seed of 10 for reproducibility. -# clusterer = KMeans(n_clusters=n_clusters, random_state=20) -# cluster_labels = clusterer.fit_predict(X) + clusterer = KMeans(n_clusters=n_clusters, random_state=10) + cluster_labels = clusterer.fit_predict(X) # The silhouette_score gives the average value for all the samples. # This gives a perspective into the density and separation of the formed @@ -53,7 +53,7 @@ for n_clusters in range_n_clusters: # Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(X, cluster_labels) -''' y_lower = 10 + y_lower = 10 for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them