--- /dev/null
+/* =============================================================================
+ *
+ * cluster.java
+ *
+ * =============================================================================
+ *
+ * Description:
+ *
+ * Takes as input a file, containing 1 data point per per line, and performs a
+ * fuzzy c-means clustering on the data. Fuzzy clustering is performed using
+ * min to max clusters and the clustering that gets the best score according to
+ * a compactness and separation criterion are returned.
+ *
+ *
+ * Author:
+ *
+ * Brendan McCane
+ * James Cook University of North Queensland. Australia.
+ * email: mccane@cs.jcu.edu.au
+ *
+ *
+ * Edited by:
+ *
+ * Jay Pisharath, Wei-keng Liao
+ * Northwestern University
+ *
+ * Chi Cao Minh
+ * Stanford University
+ *
+ * Ported to Java:
+ * Alokika Dash
+ * University of California, Irvine
+ *
+ * =============================================================================
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+*
+* =============================================================================
+*/
+public class Cluster {
+ // Flags
+ flag startLoop;
+ flag setKMeans;
+ flag toendLoop;
+ flag resetKMeans;
+ flag finish;
+
+ flag setStartZ;
+ flag startZ;
+
+ /**
+ * Numbers of loops to perform
+ */
+ int nloops;
+
+ /*
+ * Flag to indicate if nloops has been changes
+ */
+ boolean changeNLoops;
+
+ /**
+ * User input for max clusters
+ **/
+ int max_nclusters;
+
+ /**
+ * User input for min clusters
+ **/
+ int min_nclusters;
+
+ /**
+ * Using zscore transformation for cluster center
+ * deviating from distribution's mean
+ **/
+ int use_zscore_transform;
+
+ /**
+ * Total number of threads
+ **/
+ int nthreads;
+
+ /**
+ * threshold until which kmeans cluster continues
+ **/
+ float threshold;
+
+ /**
+ * List of attributes
+ **/
+ public float[][] feature;
+
+ /**
+ * Number of attributes per Object
+ **/
+ public int nfeatures;
+
+ /**
+ * Number of Objects
+ **/
+ public int npoints;
+
+ /**
+ * Since zscore transform may perform in cluster() which modifies the
+ * contents of attributes[][], we need to re-store the originals each time
+ * starting a loop.
+ */
+ public float[][] attributes;
+
+ /**
+ *
+ */
+ MyRandom randomPtr;
+
+ /*
+ * Number of clusters to compute
+ */
+ int nclusters;
+
+ /*
+ * Flag indicating if nclusters has been changed
+ */
+ boolean changeNClusters;
+
+ /*
+ * Counter for aggregation of KMeans
+ */
+ int counterKMS;
+
+ /*
+ *
+ */
+ intwrapper[] new_centers_len;
+ float[][] new_centers;
+
+ /*
+ *
+ */
+ float delta;
+
+ /*
+ * Counter for finished inner loops
+ */
+ int counterILoop;
+
+ /*
+ * Flag for initialization before setting KMeans
+ */
+ boolean isSet4KMeans;
+
+ /**
+ * Output: Number of best clusters
+ **/
+ int best_nclusters;
+
+ /**
+ * Output: Cluster centers
+ **/
+ float[][] cluster_centres;
+
+ public Cluster(int nloops,
+ int max_nclusters,
+ int min_nclusters,
+ float threshold,
+ int use_zscore_transform,
+ int nthreads,
+ int numAttributes,
+ int numObjects,
+ float[][] buf) {
+ this.nloops = nloops;
+ this.max_nclusters = max_nclusters;
+ this.min_nclusters = min_nclusters;
+ this.threshold = threshold;
+ this.use_zscore_transform = use_zscore_transform;
+ this.nthreads = nthreads;
+ this.nfeatures = numAttributes;
+ this.npoints = numObjects;
+ this.feature = buf;
+ this.attributes = new float[this.npoints][this.nfeatures];
+ this.randomPtr = new MyRandom();
+ randomPtr.random_alloc();
+ this.nclusters = 0;
+ this.changeNClusters = false;
+ this.counterILoop = 0;
+ this.counterKMS = 0;
+ this.new_centers_len = null;
+ this.new_centers = null;
+ this.best_nclusters = 0;
+ this.cluster_centres = null;
+ this.isSet4KMeans = false;
+ }
+
+ /*
+ *
+ */
+ public boolean initialize() {
+ this.nclusters = this.min_nclusters;
+ this.changeNClusters = false;
+ this.changeNLoops = false;
+ this.counterILoop = 0;
+ this.counterKMS = 0;
+ this.new_centers_len = null;
+ this.new_centers = null;
+
+ float[][] attributes = this.attributes;
+ float[][] feature = this.feature;
+ int npoints = this.npoints;
+ int nfeatures = this.nfeatures;
+ /*
+ * Since zscore transform may perform in cluster() which modifies the
+ * contents of attributes[][], we need to re-store the originals
+ */
+ for(int x = 0; x < npoints; x++) {
+ for(int y = 0; y < nfeatures; y++) {
+ attributes[x][y] = feature[x][y];
+ }
+ }
+
+ if(this.use_zscore_transform == 1) {
+ zscoreTransform(attributes, npoints, nfeatures);
+ }
+ }
+
+ /* =============================================================================
+ * extractMoments
+ * =============================================================================
+ */
+ public static float[] extractMoments (float []data,
+ int num_elts,
+ int num_moments) {
+ float[] moments = new float[num_moments];
+
+ float mzero=0.0f;
+ for (int i = 0; i < num_elts; i++) {
+ mzero += data[i];
+ }
+
+ moments[0] = mzero / num_elts;
+ for (int j = 1; j < num_moments; j++) {
+ moments[j] = 0;
+ for (int i = 0; i < num_elts; i++) {
+ moments[j] += (float) Math.pow((data[i]-moments[0]), j+1);
+ }
+ moments[j] = moments[j] / num_elts;
+ }
+ return moments;
+ }
+
+ /* =============================================================================
+ * zscoreTransform
+ * =============================================================================
+ */
+ public static void zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */
+ int numObjects,
+ int numAttributes) {
+ float[] moments;
+ float[] single_variable = new float[numObjects];
+ for (int i = 0; i < numAttributes; i++) {
+ for (int j = 0; j < numObjects; j++) {
+ single_variable[j] = data[j][i];
+ }
+ moments = extractMoments(single_variable, numObjects, 2);
+ moments[1] = (float) Math.sqrt((double)moments[1]);
+ for (int j = 0; j < numObjects; j++) {
+ data[j][i] = (data[j][i]-moments[0])/moments[1];
+ }
+ }
+ }
+
+ public void resetCounterKMS() {
+ this.counterKMS = 0;
+ }
+
+ public void incCounterKMS() {
+ this.counterKMS++;
+ }
+
+ public boolean isCounterKMSFull() {
+ return (this.counterKMS == this.nthreads);
+ }
+
+ public void resetCounterILoop() {
+ this.counterILoop = 0;
+ }
+
+ public boolean isILoopFinish() {
+ return !((this.delta > this.threshold) && (this.counterILoop < 500 + 1));
+ }
+
+ public void setBestClusters() {
+ this.best_nclusters = this.nclusters;
+ }
+
+ public void decNLoops() {
+ if(!this.changeNLoops) {
+ this.nloops--;
+ this.changeNLoops = true;
+ }
+ }
+
+ public boolean checkNLoops() {
+ return (this.nloops > 0);
+ }
+
+ public void resetChangeNLoops() {
+ this.changeNLoops = false;
+ }
+
+ public void incNClusters() {
+ if(!this.changeNClusters) {
+ this.nclusters++;
+ this.changeNClusters = true;
+ }
+ }
+
+ public boolean checkNClusters() {
+ return (this.nclusters <= this.max_nclusters);
+ }
+
+ public void resetChangeNClusters() {
+ this.changeNClusters = false;
+ }
+
+ public void resetIsSet4KMeans() {
+ this.isSet4KMeans = false;
+ }
+
+ public void setKMeans(KMeans kms) {
+ if(!this.isSet4KMeans) {
+ this.randomPtr.random_seed(7);
+ this.new_centers_len = new intwrapper[this.nclusters];
+ this.new_centers = new float[this.nclusters][this.nfeatures];
+ this.cluster_centres = new float[this.nclusters][this.nfeatures];
+ float[][] cluster_centres = this.cluster_centres;
+ float[][] attributes = this.attributes;
+ int npoints = this.npoints;
+ int nclusters = this.nclusters;
+ int nfeatures = this.nfeatures;
+ MyRandom randomPtr = this.randomPtr;
+ /* Randomly pick cluster centers */
+ for (int i = 0; i < nclusters; i++) {
+ int n = (int)(randomPtr.random_generate() % npoints);
+ for (int j = 0; j < nfeatures; j++) {
+ cluster_centres[i][j] = attributes[n][j];
+ }
+ }
+ this.delta = (float) 0.0;
+ this.isSet4KMeans = true;
+ }
+
+ this.innerKMeansSetting(kms);
+ }
+
+ public void innerKMeansSetting(KMeans kms) {
+ kms.setFeature(this.attributes);
+ kms.setClusters(this.nclusters, this.cluster_centres);
+ }
+
+ public void resetDelta() {
+ this.delta = (float) 0.0;
+ }
+
+ public void mergeKMeans(KMeans kms) {
+ int nclusters = this.nclusters;
+ int nfeatures = this.nfeatures;
+ float[][] new_centers = this.new_centers;
+ float[][] kmsnew_centers = kms.new_centers;
+ intwrapper[] new_centers_len = this.new_centers_len;
+ intwrapper[] kmsnew_centers_len = kms.new_centers_len;
+
+ this.delta += kms.delta;
+ for (int i = 0; i < nclusters; i++) {
+ for (int j = 0; j < nfeatures; j++) {
+ new_centers[i][j] += kmsnew_centers[i][j];
+ kmsnew_centers[i][j] = (float)0.0; /* set back to 0 */
+ }
+ new_centers_len[i] += kmsnew_centers_len[i];
+ kmsnew_centers_len[i] = 0; /* set back to 0 */
+ }
+
+ if(this.isCounterKMSFull()) {
+ // finish one inner loop
+ this.counterILoop++;
+
+ float[][] cluster_centres = this.cluster_centres;
+ /* Replace old cluster centers with new_centers */
+ for (int i = 0; i < nclusters; i++) {
+ for (int j = 0; j < nfeatures; j++) {
+ if (new_centers_len[i] > 0) {
+ cluster_centres[i][j] = new_centers[i][j] / new_centers_len[i];
+ }
+ new_centers[i][j] = (float)0.0; /* set back to 0 */
+ }
+ new_centers_len[i] = 0; /* set back to 0 */
+ }
+
+ this.delta /= this.npoints;
+ }
+ }
+}
+/* =============================================================================
+ *
+ * End of cluster.java
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * common.java
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+public class Common {
+
+ public Common() {
+ }
+
+
+ /* =============================================================================
+ * common_euclidDist2
+ * -- multi-dimensional spatial Euclid distance square
+ * =============================================================================
+ */
+ public static float
+ common_euclidDist2 (float[] pt1, float[] pt2, int numdims)
+ {
+ int i;
+ float ans = 0.0f;
+
+ for (i = 0; i < numdims; i++) {
+ ans += (pt1[i] - pt2[i]) * (pt1[i] - pt2[i]);
+ }
+
+ return ans;
+ }
+
+
+ /* =============================================================================
+ * common_findNearestPoint
+ * =============================================================================
+ */
+ public static int
+ common_findNearestPoint (float[] pt, /* [nfeatures] */
+ int nfeatures,
+ float[][] pts, /* [npts][nfeatures] */
+ int npts)
+ {
+ int index = 0;
+ int i;
+ //double max_dist = FLT_MAX;
+ float max_dist = (float)3.40282347e+38;
+ float limit = (float) 0.99999;
+
+ /* Find the cluster center id with min distance to pt */
+ for (i = 0; i < npts; i++) {
+ float dist = common_euclidDist2(pt, pts[i], nfeatures); /* no need square root */
+ if ((dist / max_dist) < limit) {
+ max_dist = dist;
+ index = i;
+ if (max_dist == 0) {
+ break;
+ }
+ }
+ }
+
+ return index;
+ }
+}
+
+
+/* =============================================================================
+ *
+ * End of common.java
+ *
+ * =============================================================================
+ */
--- /dev/null
+public class KMeans {
+ // Flags
+ flag init;
+ flag run;
+ flag turnin;
+ flag reset;
+
+ /**
+ * thread id
+ **/
+ public int threadid;
+
+ /**
+ * Total number of threads
+ **/
+ int nthreads;
+
+ // Read-only memebers
+ /**
+ * List of attributes
+ **/
+ public float[][] feature;
+
+ /**
+ * Number of attributes per Object
+ **/
+ public int nfeatures;
+
+ /**
+ * Number of Objects
+ **/
+ public int npoints;
+
+ /**
+ *
+ **/
+ public float[][] clusters;
+
+ /**
+ * Iteration id between min_nclusters to max_nclusters
+ **/
+ public int nclusters;
+
+ // Writable members
+ /**
+ *
+ */
+ public float delta;
+
+ /**
+ * Array that holds change index of cluster center per thread
+ **/
+ public int[] membership;
+
+ // Local copy members, writable
+ /**
+ * Number of points in each cluster [nclusters]
+ **/
+ public intwrapper[] new_centers_len;
+
+ /**
+ * New centers of the clusters [nclusters][nfeatures]
+ **/
+ public float[][] new_centers;
+
+ public KMeans(int threadid,
+ int nthreads,
+ int nfeatures,
+ int npoints,
+ int nclusters,
+ float[][] feature,
+ float[][] clusters) {
+ this.threadid = threadid;
+ this.nthreads = nthreads;
+ this.nfeatures = nfeatures;
+ this.npoints = npoints;
+ this.nclusters = nclusters;
+ this.feature = feature;
+ this.clusters = clusters;
+ this.delta = (float)0.0;
+ this.membership = new int[this.npoints / this.nthreads];
+ this.new_centers_len = null;
+ this.new_centers = null;
+ }
+
+ public void setFeature(float[][] feature){
+ this.feature = feature;
+ }
+
+ public void setClusters(int nclusters,
+ float[][] clusters){
+ this.nclusters = nclusters;
+ this.clusters = clusters;
+ }
+
+ public void init() {
+ int[] membership = this.membership;
+ int length = this.membership.length;
+ for (int i = 0; i < length; i++) {
+ membership[i] = -1;
+ }
+ this.new_centers_len = new intwrapper[this.nclusters];
+ this.new_centers = new float[this.nclusters][this.nfeatures];
+ }
+
+ public void run() {
+ float delta = (float) 0.0;
+
+ float[][] feature = this.feature;
+ int nfeatures = this.nfeatures;
+ int npoints = this.npoints;
+ int nclusters = this.nclusters;
+ int[] membership = this.membership;
+ float[][] clusters = this.clusters;
+ intwrapper[] new_centers_len = this.new_centers_len;
+ float[][] new_centers = this.new_centers;
+ int index, start, span;
+
+ start = this.threadid;
+ span = this.nthreads;
+
+ int k = 0;
+ //System.out.println("myId= " + myId + " start= " + start + " npoints= " + npoints);
+ for (int i = start; i < npoints; i += span) {
+ index = Common.common_findNearestPoint(feature[i],
+ nfeatures,
+ clusters,
+ nclusters);
+ /*
+ * If membership changes, increase delta by 1.
+ * membership[i] cannot be changed by other threads
+ */
+ if (membership[k] != index) {
+ delta += 1.0f;
+ }
+
+ /* Assign the membership to object i */
+ /* membership[i] can't be changed by other thread */
+ membership[k] = index;
+
+ /* Update new cluster centers : sum of objects located within */
+ new_centers_len[index] = new_centers_len[index] + 1;
+ for (int j = 0; j < nfeatures; j++) {
+ new_centers[index][j] = new_centers[index][j] + feature[i][j];
+ }
+ k++;
+ }
+ this.delta = delta;
+ }
+}
\ No newline at end of file
--- /dev/null
+/** Bamboo version
+ *
+ * @author jzhou
+ *
+ */
+/* =============================================================================
+ *
+ * kmeans.java
+ *
+ * =============================================================================
+ *
+ * Description:
+ *
+ * Takes as input a file:
+ * ascii file: containing 1 data point per line
+ * binary file: first int is the number of objects
+ * 2nd int is the no. of features of each object
+ *
+ * This example performs a fuzzy c-means clustering on the data. Fuzzy clustering
+ * is performed using min to max clusters and the clustering that gets the best
+ * score according to a compactness and separation criterion are returned.
+ *
+ *
+ * Author:
+ *
+ * Wei-keng Liao
+ * ECE Department Northwestern University
+ * email: wkliao@ece.northwestern.edu
+ *
+ *
+ * Edited by:
+ *
+ * Jay Pisharath
+ * Northwestern University
+ *
+ * Chi Cao Minh
+ * Stanford University
+ *
+ * Port to Java version
+ * Alokika Dash
+ * University of California, Irvine
+ *
+ * =============================================================================
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+task t1(StartupObject s{initialstate}) {
+ //System.printString("task t1\n");
+
+ // Benchmark settings
+ int max_nclusters = 40;
+ int min_nclusters = 40;
+ float threshold = (float)0.00001;
+ int use_zscore_transform = 1;
+ int nthreads = 62;
+ int nloops = 1;
+
+ // Initialize object data
+ int numRow = nthreads * 120;
+ int numDim = 16 * 2; //5;
+ int numAttributes = numDim;
+ int numObjects = numRow; // should be times of nthreads
+ float[][] buf = new float[numObjects][numAttributes];
+ // create object info here
+ long seed = 0;
+ Random rand = new Random(seed);
+ if(false) {
+ // Clustered random using gaussian
+ /*int numCenter = 16;
+ int maxInt = (~0x1) + 1;
+ float[][] centers = new float[numCenter][numDim];
+ for(int i = 0; i < numCenter; i++) {
+ for(int j = 0; j < numDim; j++) {
+ centers[i][j] = (float)rand.nextInt() / maxInt;
+ }
+ }
+ //float sigma = ((float)1.0 / numCenter) * ((float)1.0 / numCenter) * ((float)1.0 / numCenter);
+ for(int i = 0; i < numRow; i++) {
+ //buf[i][0] = numRow;
+ int ranI = Math.abs(rand.nextInt()) % numCenter;
+ float[] center = centers[ranI];
+ for(int j = 0; j < numDim; j++) {
+ float noise = (float)rand.nextGaussian();
+ buf[i][j] = center[j] + noise;
+ }
+ }*/
+ } else {
+ // uniform random
+ int maxInt = (~0x1) + 1;
+ for(int i = 0; i < numRow; i++) {
+ //buf[i][0] = numRow;
+ for(int j = 0; j < numDim; j++) {
+ buf[i][j] = (float)rand.nextInt() / maxInt;
+ //buf[i][j] = (float)(i * 20000 + j * 50000) / maxInt;
+ }
+ }
+ }
+
+ Cluster cluster = new Cluster(nloops,
+ max_nclusters,
+ min_nclusters,
+ threshold,
+ use_zscore_transform,
+ nthreads,
+ numAttributes,
+ numObjects,
+ buf) {startLoop};
+
+ /* Create parallel parts */
+ for(int i = 0; i<nthreads; i++) {
+ KMeans kms = new KMeans(i,
+ nthreads,
+ numAttributes,
+ numObjects,
+ 0,
+ null,
+ null) {init}; // TODO
+ }
+
+ taskexit(s{!initialstate});
+}
+
+task t2(Cluster cluster{startLoop}) {
+ //System.printString("task t2\n");
+
+ cluster.initialize();
+
+ taskexit(cluster{!startLoop, setKMeans});
+}
+
+task t3(Cluster cluster{setKMeans}, KMeans kms{init}) {
+ //System.printString("task t3\n");
+
+ cluster.incCounterKMS();
+ cluster.setKMeans(kms);
+ kms.init();
+
+ if(cluster.isCounterKMSFull()) {
+ // Merged all KMeans
+ cluster.resetCounterKMS();
+ cluster.resetIsSet4KMeans();
+ taskexit(cluster{!setKMeans, toendLoop}, kms{!init, run});
+ } else {
+ taskexit(kms{!init, run});
+ }
+}
+
+task t4(KMeans kms{run}) {
+ //System.printString("task t4\n");
+
+ kms.run();
+
+ taskexit(kms{!run, turnin});
+}
+
+task t5(Cluster cluster{toendLoop}, KMeans kms{turnin}) {
+ //System.printString("task t5\n");
+
+ cluster.incCounterKMS();
+ cluster.mergeKMeans(kms);
+
+ if(cluster.isCounterKMSFull()) {
+ // Merged all KMeans
+ cluster.resetCounterKMS();
+ taskexit(cluster{!toendLoop, resetKMeans}, kms{!turnin, reset});
+ } else {
+ taskexit(kms{!turnin, reset});
+ }
+}
+
+task t6(Cluster cluster{resetKMeans}, KMeans kms{reset}) {
+ //System.printString("task t6\n");
+
+ cluster.incCounterKMS();
+ boolean isILoopFinish = cluster.isILoopFinish();
+ if(isILoopFinish) {
+ // Inner loop finished
+ cluster.setBestClusters();
+ cluster.incNClusters();
+ if(!cluster.checkNClusters()) {
+ // Outer loop also finished
+ cluster.decNLoops();
+ }
+ } else{
+ // Go on for annother inner loop, goto t4
+ cluster.innerKMeansSetting(kms);
+ }
+
+ if(cluster.isCounterKMSFull()) {
+ // Merged all KMeans
+ cluster.resetCounterKMS();
+ cluster.resetChangeNClusters();
+ cluster.resetChangeNLoops();
+
+ // Check if inner loop is finished
+ if(isILoopFinish) {
+ // Inner loop finished, check if start another outer loop
+ if(cluster.checkNClusters()) {
+ // Start another outer loop, goto t3
+ taskexit(cluster{!resetKMeans, setKMeans}, kms{!reset,init});
+ } else {
+ // Check if start another out most loop
+ if(cluster.checkNLoops()) {
+ // Start another out most loop, goto t2
+ taskexit(cluster{!resetKMeans, startLoop}, kms{!reset,init});
+ } else {
+ // Terminate
+ taskexit(cluster{!resetKMeans, finish}, kms{!reset});
+ }
+ }
+ } else {
+ // Go on for annother inner loop, goto t4
+ cluster.resetDelta();
+ taskexit(cluster{!resetKMeans, toendLoop},kms{!reset, run});
+ }
+ } else {
+ // Check if inner loop is finished
+ if(isILoopFinish) {
+ // Inner loop finished, check if start another outer loop
+ if(cluster.checkNClusters()) {
+ // Start another outer loop, goto t3
+ taskexit(kms{!reset,init});
+ } else {
+ // Check if start another out most loop
+ if(cluster.checkNLoops()) {
+ // Start another out most loop, goto t2
+ taskexit(kms{!reset,init});
+ } else {
+ // Terminate
+ taskexit(kms{!reset});
+ }
+ }
+ } else {
+ // Go on for annother inner loop, goto t4
+ taskexit(kms{!reset, run});
+ }
+ }
+}
--- /dev/null
+IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+By downloading, copying, installing or using the software you agree
+to this license. If you do not agree to this license, do not download,
+install, copy or use the software.
+
+
+Copyright (c) 2006 Northwestern University
+All rights reserved.
+
+Redistribution of the software in source and binary forms,
+with or without modification, is permitted provided that the
+following conditions are met:
+
+1 Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2 Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3 Neither the name of Northwestern University nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
--- /dev/null
+public class MyRandom {
+ int[] mt;
+ int mti;
+ int RANDOM_DEFAULT_SEED;
+ /* period parameter */
+ int N;
+ int M;
+ int MATRIX_A;
+ int UPPER_MASK;
+ int LOWER_MASK;
+ int[] mag01;
+
+ public MyRandom() {
+ RANDOM_DEFAULT_SEED = 0;
+ N = 624;
+ M = 397;
+ mt = new int[N];
+ mti = N;
+ MATRIX_A = 0x9908b0df; /* constant vector a */
+ UPPER_MASK = 0x80000000; /* most significant w-r bits */
+ LOWER_MASK = 0x7fffffff; /* least significant r bits */
+ mag01 = new int[2];
+ mag01[0] = 0x0;
+ mag01[1] = MATRIX_A;
+
+ }
+
+ public void random_alloc() {
+ init_genrand(this.RANDOM_DEFAULT_SEED);
+ }
+
+ /* initializes mt[N] with a seed */
+ public void init_genrand(int s) {
+ mt[0]= s & 0xFFFFFFFF;
+ for (int mti=1; mti<N; mti++) {
+ mt[mti] = (1812433253 * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ mt[mti] &= 0xFFFFFFFF;
+ /* for >32 bit machines */
+ }
+ this.mti=mti;
+ }
+
+ public void random_seed(int seed) {
+ init_genrand(seed);
+ }
+
+ public int random_generate() {
+ return genrand_int32();
+ }
+
+ public int posrandom_generate() {
+ int r=genrand_int32();
+ if (r>0)
+ return r;
+ else
+ return -r;
+ }
+
+ public int genrand_int32() {
+ int y;
+ int mti = this.mti;
+
+ /* mag01[x] = x * MATRIX_A for x=0,1 */
+
+ if (mti >= 624) { /* generate N words at one time */
+ int kk;
+ int[] mt = this.mt;
+
+ if (mti == 624+1) /* if init_genrand() has not been called, */
+ init_genrand(5489); /* a default initial seed is used */
+
+ for (kk=0;kk<(624-397);kk++) {
+ y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff);
+ mt[kk] = mt[kk+397] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df);
+ }
+ for (;kk<(624-1);kk++) {
+ y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff);
+ mt[kk] = mt[kk+(397-624)] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df);
+ }
+ y = (mt[624-1]&0x80000000)|(mt[0]&0x7fffffff);
+ mt[624-1] = mt[397-1] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df);
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+
+ /* Tempering */
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680;
+ y ^= (y << 15) & 0xefc60000;
+ y ^= (y >> 18);
+
+ this.mti = mti;
+
+ return y;
+ }
+}
--- /dev/null
+Introduction
+------------
+
+This benchmark was taken from NU-MineBench 2.0. Their technical report [3]
+provides a description of the algorithm:
+
+ K-means is a partition-based method [4] and is arguably the most
+ commonly used clustering technique. K-means represents a cluster by the
+ mean value of all objects contained in it. Given the user-provided
+ parameter k, the initial k cluster centers are randomly selected from
+ the database. Then, K-means assigns each object to its nearest cluster
+ center based on the similarity function. For example, for spatial
+ clustering, usually the Euclid distance is used to measure the closeness
+ of two objects. Once the assignments are completed, new centers are
+ found by finding the mean of all the objects in each cluster. This
+ process is repeated until two consecutive iterations generate the same
+ cluster assignment. The clusters produced by the K-means algorithm are
+ sometimes called "hard" clusters, since any data object either is or is
+ not a member of a particular cluster.
+
+
+Compiling and Running
+---------------------
+
+To build the application, simply run:
+
+ make
+
+By default, this produces an executable named "KMeans.bin", which can then be
+run in the following manner:
+
+ ./KMeans.bin -m <max_clusters> \
+ -n <min_clusters> \
+ -t <threshold> \
+ -i <input_file_name> \
+ -nthreads <number of threads>
+
+To produce the data in [1], the following values were used:
+
+ low contention: -m 40 -n 40 -t 0.05 -i inputs/random-n2048-d16-c16.txt
+ high contention: -m 15 -n 15 -t 0.05 -i inputs/random-n2048-d16-c16.txt
+
+For runs without a simulator, a larger input file, (more info below) can be used
+instead:
+
+ low contention: -m 40 -n 40 -t 0.00001 -i inputs/random-n65536-d32-c16.txt
+ high contention: -m 15 -n 15 -t 0.00001 -i inputs/random-n65536-d32-c16.txt
+
+
+Input Files
+-----------
+
+A section of the real image database distributed by Corel Corporation is used
+for K-means. This database consists of 17695 scenery pictures. Each picture is
+represented by two features: color and edge. The color feature is a vector of 9
+floating points while the edge feature is a vector of size 18. Both K-means
+implementations use Euclid distance as the similarity function and execute it
+for the two features separately. Since the clustering quality of K-means
+methods highly depends on the input parameter k, both K-means were executed
+with ten different k values ranging from 4 to 13.
+
+Smaller versions of "color", "edge", and "texture" were made by taking the
+first 100 lines of each file. The smaller inputs are called "color100",
+"edge100", and "texture100".
+
+The randomX_Y files were created by generating random data sets with X points
+of Y dimensions each. Values were selected from a uniform distribution.
+
+More input sets can be generated by using "inputs/generate.py". The script picks
+a given number of uniformly distributed random centers and then selects random
+points around those centers using a gaussian distribution. There are two
+included input files from this script: small, random-r2048-d16-c16.txt; and
+large, random-r65536-d32-c16.txt. In the filename, "n" refers to the number of
+points, "d" the number of dimensions, and "c" the number of centers.
+
+
+References
+----------
+
+[1] C. Cao Minh, J. Chung, C. Kozyrakis, and K. Olukotun. STAMP: Stanford
+ Transactional Applications for Multi-processing. In IISWC '08: Proceedings
+ of The IEEE International Symposium on Workload Characterization,
+ September 2008.
+
+[2] J. Pisharath. NU-MineBench 2.0. Technical Report CUCIS-2005-08-01, 2005.
+
+[3] J.C. Bezdek. Pattern Recognition with Fuzzy Objective Function Algorithms.
+ Kluwer Academic Publishers, 1981
+