1 /* =============================================================================
5 * =============================================================================
9 * Takes as input a file, containing 1 data point per per line, and performs a
10 * fuzzy c-means clustering on the data. Fuzzy clustering is performed using
11 * min to max clusters and the clustering that gets the best score according to
12 * a compactness and separation criterion are returned.
18 * James Cook University of North Queensland. Australia.
19 * email: mccane@cs.jcu.edu.au
24 * Jay Pisharath, Wei-keng Liao
25 * Northwestern University
32 * University of California, Irvine
34 * =============================================================================
36 * For the license of kmeans, please see kmeans/LICENSE.kmeans
38 * ------------------------------------------------------------------------
40 * Unless otherwise noted, the following license applies to STAMP files:
42 * Copyright (c) 2007, Stanford University
43 * All rights reserved.
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions are
49 * * Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
52 * * Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in
54 * the documentation and/or other materials provided with the
57 * * Neither the name of Stanford University nor the names of its
58 * contributors may be used to endorse or promote products derived
59 * from this software without specific prior written permission.
61 * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
62 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
64 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
66 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
67 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
69 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
71 * THE POSSIBILITY OF SUCH DAMAGE.
73 * =============================================================================
76 public class Cluster {
82 /* =============================================================================
84 * =============================================================================
87 extractMoments (float []data, int num_elts, int num_moments)
89 float[] moments = new float[num_moments];
92 for (int i = 0; i < num_elts; i++) {
94 moments[0] += data[i];
97 moments[0] = moments[0] / num_elts;
98 for (int j = 1; j < num_moments; j++) {
100 for (int i = 0; i < num_elts; i++) {
101 moments[j] += (float) Math.pow((data[i]-moments[0]), j+1);
103 moments[j] = moments[j] / num_elts;
109 /* =============================================================================
111 * =============================================================================
114 zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */
119 float[] single_variable = new float[numObjects];
120 for (int i = 0; i < numAttributes; i++) {
121 for (int j = 0; j < numObjects; j++) {
122 single_variable[j] = data[j][i];
124 moments = extractMoments(single_variable, numObjects, 2);
125 moments[1] = (float) Math.sqrt((double)moments[1]);
126 for (int j = 0; j < numObjects; j++) {
127 data[j][i] = (data[j][i]-moments[0])/moments[1];
131 single_variable=null;
135 /* =============================================================================
137 * =============================================================================
141 int nthreads, /* in: number of threads*/
142 int numObjects, /* number of input objects */
143 int numAttributes, /* size of attribute of each object */
144 float[][] attributes, /* [numObjects][numAttributes] */
145 KMeans kms, /* KMeans class hold the inputs and outputs */
146 GlobalArgs args /* Global thread arguments */
152 float[][] tmp_cluster_centres;
153 int[] membership = new int[numObjects];
155 Random randomPtr = new Random();
156 randomPtr.random_alloc();
158 if (kms.use_zscore_transform == 1) {
159 zscoreTransform(attributes, numObjects, numAttributes);
165 * From min_nclusters to max_nclusters, find best_nclusters
167 System.out.println("min_nclusters= " + kms.min_nclusters + " max_nclusters= " + kms.max_nclusters);
168 for (nclusters = kms.min_nclusters; nclusters <= kms.max_nclusters; nclusters++) {
170 randomPtr.random_seed(7);
171 args.nclusters = nclusters;
173 Normal norm = new Normal();
175 tmp_cluster_centres = norm.normal_exec(nthreads,
186 kms.cluster_centres = tmp_cluster_centres;
187 kms.best_nclusters = nclusters;
195 /* =============================================================================
197 * End of cluster.java
199 * =============================================================================