1 /* =============================================================================
5 * =============================================================================
9 * Takes as input a file, containing 1 data point per per line, and performs a
10 * fuzzy c-means clustering on the data. Fuzzy clustering is performed using
11 * min to max clusters and the clustering that gets the best score according to
12 * a compactness and separation criterion are returned.
18 * James Cook University of North Queensland. Australia.
19 * email: mccane@cs.jcu.edu.au
24 * Jay Pisharath, Wei-keng Liao
25 * Northwestern University
32 * University of California, Irvine
34 * =============================================================================
36 * For the license of kmeans, please see kmeans/LICENSE.kmeans
38 * ------------------------------------------------------------------------
40 * Unless otherwise noted, the following license applies to STAMP files:
42 * Copyright (c) 2007, Stanford University
43 * All rights reserved.
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions are
49 * * Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
52 * * Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in
54 * the documentation and/or other materials provided with the
57 * * Neither the name of Stanford University nor the names of its
58 * contributors may be used to endorse or promote products derived
59 * from this software without specific prior written permission.
61 * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
62 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
64 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
66 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
67 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
69 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
71 * THE POSSIBILITY OF SUCH DAMAGE.
73 * =============================================================================
76 public class Cluster {
82 /* =============================================================================
84 * =============================================================================
87 extractMoments (float []data, int num_elts, int num_moments)
89 float[] moments = new float[num_moments];
91 for (int i = 0; i < num_elts; i++) {
92 moments[0] += data[i];
95 moments[0] = moments[0] / num_elts;
96 for (int j = 1; j < num_moments; j++) {
98 for (int i = 0; i < num_elts; i++) {
99 moments[j] += Math.pow((data[i]-moments[0]), j+1);
101 moments[j] = moments[j] / num_elts;
107 /* =============================================================================
109 * =============================================================================
112 zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */
117 float[] single_variable = new float[numObjects];
118 for (int i = 0; i < numAttributes; i++) {
119 for (int j = 0; j < numObjects; j++) {
120 single_variable[j] = data[j][i];
122 moments = extractMoments(single_variable, numObjects, 2);
123 moments[1] = (float) Math.sqrt((double)moments[1]);
124 for (int j = 0; j < numObjects; j++) {
125 data[j][i] = (data[j][i]-moments[0])/moments[1];
131 /* =============================================================================
133 * =============================================================================
137 int nthreads, /* in: number of threads*/
138 int numObjects, /* number of input objects */
139 int numAttributes, /* size of attribute of each object */
140 float[][] attributes, /* [numObjects][numAttributes] */
141 KMeans kms, /* KMeans class hold the inputs and outputs */
142 GlobalArgs args /* Global thread arguments */
148 float[][] tmp_cluster_centres = null;
149 int[] membership = new int[numObjects];
151 Random randomPtr = new Random();
152 randomPtr = randomPtr.random_alloc(randomPtr);
154 if (kms.use_zscore_transform == 1) {
155 zscoreTransform(attributes, numObjects, numAttributes);
161 * From min_nclusters to max_nclusters, find best_nclusters
163 for (nclusters = kms.min_nclusters; nclusters <= kms.max_nclusters; nclusters++) {
165 randomPtr.random_seed(randomPtr, 7);
166 args.nclusters = nclusters;
168 Normal norm = new Normal();
170 tmp_cluster_centres = norm.normal_exec(nthreads,
181 kms.cluster_centres = tmp_cluster_centres;
182 kms.best_nclusters = nclusters;
192 /* =============================================================================
194 * End of cluster.java
196 * =============================================================================