--- /dev/null
+/* =============================================================================
+ *
+ * cluster.java
+ *
+ * =============================================================================
+ *
+ * Description:
+ *
+ * Takes as input a file, containing 1 data point per per line, and performs a
+ * fuzzy c-means clustering on the data. Fuzzy clustering is performed using
+ * min to max clusters and the clustering that gets the best score according to
+ * a compactness and separation criterion are returned.
+ *
+ *
+ * Author:
+ *
+ * Brendan McCane
+ * James Cook University of North Queensland. Australia.
+ * email: mccane@cs.jcu.edu.au
+ *
+ *
+ * Edited by:
+ *
+ * Jay Pisharath, Wei-keng Liao
+ * Northwestern University
+ *
+ * Chi Cao Minh
+ * Stanford University
+ *
+ * Ported to Java:
+ * Alokika Dash
+ * University of California, Irvine
+ *
+ * =============================================================================
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+*
+* =============================================================================
+*/
+
+public class Cluster {
+
+ public Cluster() {
+
+ }
+
+ /* =============================================================================
+ * extractMoments
+ * =============================================================================
+ */
+ public static float[]
+ extractMoments (float []data, int num_elts, int num_moments)
+ {
+ float[] moments = new float[num_moments];
+
+ for (int i = 0; i < num_elts; i++) {
+ moments[0] += data[i];
+ }
+
+ moments[0] = moments[0] / num_elts;
+ for (int j = 1; j < num_moments; j++) {
+ moments[j] = 0;
+ for (int i = 0; i < num_elts; i++) {
+ moments[j] = (float)(moments[j]+ Math.pow((data[i]-moments[0]), j+1));
+ }
+ moments[j] = moments[j] / num_elts;
+ }
+ return moments;
+ }
+
+
+ /* =============================================================================
+ * zscoreTransform
+ * =============================================================================
+ */
+ public static void
+ zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */
+ int numObjects,
+ int numAttributes)
+ {
+ float[] moments;
+ float[] single_variable = new float[numObjects];
+ for (int i = 0; i < numAttributes; i++) {
+ for (int j = 0; j < numObjects; j++) {
+ single_variable[j] = data[j][i];
+ }
+ moments = extractMoments(single_variable, numObjects, 2);
+ moments[1] = (float) Math.sqrt((double)moments[1]);
+ for (int j = 0; j < numObjects; j++) {
+ data[j][i] = (data[j][i]-moments[0])/moments[1];
+ }
+ }
+ }
+
+
+ /* =============================================================================
+ * cluster_exec
+ * =============================================================================
+ */
+ public static void
+ cluster_exec (
+ int nthreads, /* in: number of threads*/
+ int numObjects, /* number of input objects */
+ int numAttributes, /* size of attribute of each object */
+ float[][] attributes, /* [numObjects][numAttributes] */
+ KMeans kms, /* KMeans class hold the inputs and outputs */
+ GlobalArgs args /* Global thread arguments */
+ )
+ {
+ int itime;
+ int nclusters;
+
+ float[][] tmp_cluster_centres;
+ int[] membership = new int[numObjects];
+
+ Random randomPtr = new Random();
+ randomPtr.random_alloc();
+
+ if (kms.use_zscore_transform == 1) {
+ zscoreTransform(attributes, numObjects, numAttributes);
+ }
+
+ itime = 0;
+
+ /*
+ * From min_nclusters to max_nclusters, find best_nclusters
+ */
+ for (nclusters = kms.min_nclusters; nclusters <= kms.max_nclusters; nclusters++) {
+
+ randomPtr.random_seed(7);
+
+ Normal norm = new Normal();
+
+ tmp_cluster_centres = norm.normal_exec(nthreads,
+ attributes,
+ numAttributes,
+ numObjects,
+ nclusters,
+ kms.threshold,
+ membership,
+ randomPtr,
+ args);
+
+ kms.cluster_centres = tmp_cluster_centres;
+ kms.best_nclusters = nclusters;
+
+
+ itime++;
+ } /* nclusters */
+ }
+}
+
+/* =============================================================================
+ *
+ * End of cluster.java
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* =============================================================================
+ *
+ * common.java
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+public class Common {
+
+ public Common() {
+ }
+
+
+ /* =============================================================================
+ * common_euclidDist2
+ * -- multi-dimensional spatial Euclid distance square
+ * =============================================================================
+ */
+ public static float
+ common_euclidDist2 (float[] pt1, float[] pt2, int numdims)
+ {
+ int i;
+ float ans = 0.0f;
+
+ for (i = 0; i < numdims; i++) {
+ ans += (pt1[i] - pt2[i]) * (pt1[i] - pt2[i]);
+ }
+
+ return ans;
+ }
+
+
+ /* =============================================================================
+ * common_findNearestPoint
+ * =============================================================================
+ */
+ public static int
+ common_findNearestPoint (float[] pt, /* [nfeatures] */
+ int nfeatures,
+ float[][] pts, /* [npts][nfeatures] */
+ int npts)
+ {
+ int index = -1;
+ int i;
+ //double max_dist = FLT_MAX;
+ float max_dist = (float)3.40282347e+38f;
+ float limit = (float) 0.99999;
+
+ /* Find the cluster center id with min distance to pt */
+ for (i = 0; i < npts; i++) {
+ float dist = common_euclidDist2(pt, pts[i], nfeatures); /* no need square root */
+ if ((dist / max_dist) < limit) {
+ max_dist = dist;
+ index = i;
+ if (max_dist == 0) {
+ break;
+ }
+ }
+ }
+
+ return index;
+ }
+}
+
+
+/* =============================================================================
+ *
+ * End of common.java
+ *
+ * =============================================================================
+ */
--- /dev/null
+/* ==============================================================================
+ *
+ * GlobalArgs.java
+ * -- Class that holds all the global parameters used by each thread
+ * during parallel execution
+ *
+ * =============================================================================
+ * Author:
+ *
+ * Alokika Dash
+ * University of California, Irvine
+ * email adash@uci.edu
+ *
+ * =============================================================================
+ */
+
+public class GlobalArgs {
+
+ public GlobalArgs() {
+
+ }
+
+ /**
+ * Number of threads
+ **/
+ public int nthreads;
+
+ /**
+ * List of attributes
+ **/
+ public float[][] feature;
+
+ /**
+ * Number of attributes per Object
+ **/
+ public int nfeatures;
+
+ /**
+ * Number of Objects
+ **/
+ public int npoints;
+
+
+ /**
+ * Iteration id between min_nclusters to max_nclusters
+ **/
+ public int nclusters;
+
+
+ /**
+ * Array that holds change index of cluster center per thread
+ **/
+ public int[] membership;
+
+ /**
+ *
+ **/
+ public float[][] clusters;
+
+
+ /**
+ * Number of points in each cluster [nclusters]
+ **/
+ public int[] new_centers_len;
+
+ /**
+ * New centers of the clusters [nclusters][nfeatures]
+ **/
+ public float[][] new_centers;
+
+ /**
+ *
+ **/
+ public int global_i;
+
+ public float global_delta;
+
+ long global_time;
+
+}
--- /dev/null
+ /* =============================================================================
+ *
+ * kmeans.java
+ *
+ * =============================================================================
+ *
+ * Description:
+ *
+ * Takes as input a file:
+ * ascii file: containing 1 data point per line
+ * binary file: first int is the number of objects
+ * 2nd int is the no. of features of each object
+ *
+ * This example performs a fuzzy c-means clustering on the data. Fuzzy clustering
+ * is performed using min to max clusters and the clustering that gets the best
+ * score according to a compactness and separation criterion are returned.
+ *
+ *
+ * Author:
+ *
+ * Wei-keng Liao
+ * ECE Department Northwestern University
+ * email: wkliao@ece.northwestern.edu
+ *
+ *
+ * Edited by:
+ *
+ * Jay Pisharath
+ * Northwestern University
+ *
+ * Chi Cao Minh
+ * Stanford University
+ *
+ * Port to Java version
+ * Alokika Dash
+ * University of California, Irvine
+ *
+ * =============================================================================
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+public class KMeans /*extends Thread*/ {
+ /**
+ * User input for max clusters
+ **/
+ int max_nclusters;
+
+ /**
+ * User input for min clusters
+ **/
+ int min_nclusters;
+
+ /**
+ * Check for Binary file
+ **/
+ int isBinaryFile;
+
+ /**
+ * Using zscore transformation for cluster center
+ * deviating from distribution's mean
+ **/
+ int use_zscore_transform;
+
+ /**
+ * Input file name used for clustering
+ **/
+ String filename;
+
+ /**
+ * Total number of threads
+ **/
+ int nthreads;
+
+ /**
+ * threshold until which kmeans cluster continues
+ **/
+ float threshold;
+
+ /**
+ * thread id
+ **/
+ int threadid;
+
+ /**
+ * Global arguments for threads
+ **/
+ GlobalArgs g_args;
+
+ /**
+ * Output: Number of best clusters
+ **/
+ int best_nclusters;
+
+ /**
+ * Output: Cluster centers
+ **/
+ float[][] cluster_centres;
+
+ public KMeans() {
+ max_nclusters = 13;
+ min_nclusters = 4;
+ isBinaryFile = 0;
+ use_zscore_transform = 1;
+ threshold = (float) 0.001;
+ best_nclusters=0;
+ }
+
+ public KMeans(int threadid, GlobalArgs g_args) {
+ this.threadid = threadid;
+ this.g_args = g_args;
+ }
+
+ public void run() {
+ while(true) {
+// Barrier.enterBarrier();
+ Normal.work(threadid, g_args);
+// Barrier.enterBarrier();
+ }
+ }
+
+ /* =============================================================================
+ * main
+ * =============================================================================
+ */
+ public static void main(String[] args) {
+ int nthreads;
+ int MAX_LINE_LENGTH = 1000000; /* max input is 400000 one digit input + spaces */
+
+ /**
+ * Read options fron the command prompt
+ **/
+ KMeans kms = new KMeans();
+ KMeans.parseCmdLine(args, kms);
+ nthreads = kms.nthreads;
+
+ /* Initiate Barriers */
+// Barrier.setBarrier(nthreads);
+
+ if (kms.max_nclusters < kms.min_nclusters) {
+ System.out.println("Error: max_clusters must be >= min_clusters\n");
+ System.exit(0);
+ }
+
+ float[][] buf;
+ float[][] attributes;
+ int numAttributes = 0;
+ int numObjects = 0;
+
+ /*
+ * From the input file, get the numAttributes (columns in txt file) and numObjects (rows in txt file)
+ */
+ if (kms.isBinaryFile == 1) {
+ System.out.println("TODO: Unimplemented Binary file option\n");
+ System.exit(0);
+ }
+
+ FileInputStream inputFile = new FileInputStream(kms.filename);
+ byte b[] = new byte[MAX_LINE_LENGTH];
+ int n;
+ while ((n = inputFile.read(b)) != 0) {
+ for (int i = 0; i < n; i++) {
+ if (b[i] == '\n')
+ numObjects++;
+ }
+ }
+ inputFile.close();
+ inputFile = new FileInputStream(kms.filename);
+ String line = null;
+ if((line = inputFile.readLine()) != null) {
+ int index = 0;
+ boolean prevWhiteSpace = true;
+ while(index < line.length()) {
+ char c = line.charAt(index++);
+ boolean currWhiteSpace = Character.isWhitespace(c);
+ if(prevWhiteSpace && !currWhiteSpace){
+ numAttributes++;
+ }
+ prevWhiteSpace = currWhiteSpace;
+ }
+ }
+ inputFile.close();
+
+ /* Ignore the first attribute: numAttributes = 1; */
+ numAttributes = numAttributes - 1;
+ System.out.println("numObjects= " + numObjects + " numAttributes= " + numAttributes);
+
+ /* Allocate new shared objects and read attributes of all objects */
+ buf = new float[numObjects][numAttributes];
+ attributes = new float[numObjects][numAttributes];
+ KMeans.readFromFile(inputFile, kms.filename, buf, MAX_LINE_LENGTH);
+ System.out.println("Finished Reading from file ......");
+
+ /*
+ * The core of the clustering
+ */
+
+ int nloops = 1;
+ int len = kms.max_nclusters - kms.min_nclusters + 1;
+
+ KMeans[] km = new KMeans[nthreads];
+ GlobalArgs g_args = disjoint GARGS new GlobalArgs();
+ g_args.nthreads = nthreads;
+
+ /* Create and Start Threads */
+ for(int i = 1; i<nthreads; i++) {
+ km[i] = new KMeans(i, g_args);
+ }
+
+ for(int i = 1; i<nthreads; i++) {
+ km[i].run();
+ }
+
+ System.out.println("Finished Starting threads......");
+
+ for (int i = 0; i < nloops; i++) {
+ /*
+ * Since zscore transform may perform in cluster() which modifies the
+ * contents of attributes[][], we need to re-store the originals
+ */
+ for(int x = 0; x < numObjects; x++) {
+ for(int y = 0; y < numAttributes; y++) {
+ attributes[x][y] = buf[x][y];
+ }
+ }
+
+ Cluster.cluster_exec(nthreads,
+ numObjects,
+ numAttributes,
+ attributes, // [numObjects][numAttributes]
+ kms, //main class that holds users inputs from command prompt and output arrays that need to be filled
+ g_args); // Global arguments common to all threads
+ }
+
+ System.out.println("TIME="+g_args.global_time);
+
+ System.out.println("Printing output......");
+ System.out.println("Best_nclusters= " + kms.best_nclusters);
+
+ /* Output: the coordinates of the cluster centres */
+ {
+ for (int i = 0; i < kms.best_nclusters; i++) {
+ System.out.print(i + " ");
+ for (int j = 0; j < numAttributes; j++) {
+ System.out.print(kms.cluster_centres[i][j] + " ");
+ }
+ System.out.println("\n");
+ }
+ }
+
+ System.out.println("Finished......");
+ System.exit(0);
+ }
+
+ public static void parseCmdLine(String args[], KMeans km) {
+ int i = 0;
+ String arg;
+ while (i < args.length && args[i].startsWith("-")) {
+ arg = args[i++];
+ //check options
+ if(arg.equals("-m")) {
+ if(i < args.length) {
+ km.max_nclusters = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-n")) {
+ if(i < args.length) {
+ km.min_nclusters = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-t")) {
+ if(i < args.length) {
+ km.threshold = (float) Double.parseDouble(args[i++]);
+ }
+ } else if(arg.equals("-i")) {
+ if(i < args.length) {
+ km.filename = args[i++];
+ }
+ } else if(arg.equals("-b")) {
+ if(i < args.length) {
+ km.isBinaryFile = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-z")) {
+ km.use_zscore_transform=0;
+ } else if(arg.equals("-nthreads")) {
+ if(i < args.length) {
+ km.nthreads = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-h")) {
+ km.usage();
+ }
+ }
+ if(km.nthreads == 0 || km.filename == null) {
+ km.usage();
+ }
+ }
+
+ /**
+ * The usage routine which describes the program options.
+ **/
+ public void usage() {
+ System.out.println("usage: ./kmeans -m <max_clusters> -n <min_clusters> -t <threshold> -i <filename> -nthreads <threads>\n");
+ System.out.println( " -i filename: file containing data to be clustered\n");
+ System.out.println( " -b input file is in binary format\n");
+ System.out.println( " -m max_clusters: maximum number of clusters allowed\n");
+ System.out.println( " -n min_clusters: minimum number of clusters allowed\n");
+ System.out.println( " -z : don't zscore transform data\n");
+ System.out.println( " -t threshold : threshold value\n");
+ System.out.println( " -nthreads : number of threads\n");
+ }
+
+ /**
+ * readFromFile()
+ * Read attributes from the input file into an array
+ **/
+ public static void readFromFile(FileInputStream inputFile, String filename, float[][] buf, int MAX_LINE_LENGTH) {
+ inputFile = new FileInputStream(filename);
+ int j;
+ int i = 0;
+
+ byte b[] = new byte[MAX_LINE_LENGTH];
+ int n;
+ byte oldbytes[]=null;
+
+
+ j = -1;
+ while ((n = inputFile.read(b)) != 0) {
+ int x=0;
+
+ if (oldbytes!=null) {
+ //find space
+ boolean cr=false;
+ for (;x < n; x++) {
+ if (b[x] == ' ')
+ break;
+ if (b[x] == '\n') {
+ cr=true;
+ break;
+ }
+ }
+ byte newbytes[]=new byte[x+oldbytes.length];
+ boolean isnumber=false;
+ for(int ii=0;ii<oldbytes.length;ii++) {
+ if (oldbytes[ii]>='0'&&oldbytes[ii]<='9')
+ isnumber=true;
+ newbytes[ii]=oldbytes[ii];
+ }
+ for(int ii=0;ii<x;ii++) {
+ if (b[ii]>='0'&&b[ii]<='9')
+ isnumber=true;
+ newbytes[ii+oldbytes.length]=b[ii];
+ }
+ if (x!=n)
+ x++; //skip past space or cr
+ if (isnumber) {
+ if (j>=0) {
+ buf[i][j]=(float)Double.parseDouble(new String(newbytes, 0, newbytes.length));
+ }
+ j++;
+ }
+ if (cr) {
+ j=-1;
+ i++;
+ }
+ oldbytes=null;
+ }
+
+ while (x < n) {
+ int y=x;
+ boolean cr=false;
+ boolean isnumber=false;
+ for(y=x;y<n;y++) {
+ if ((b[y]>='0')&&(b[y]<='9'))
+ isnumber=true;
+ if (b[y]==' ')
+ break;
+ if (b[y]=='\n') {
+ cr=true;
+ break;
+ }
+ }
+ if (y==n) {
+ //need to continue for another read
+ oldbytes=new byte[y-x];
+ for(int ii=0;ii<(y-x);ii++)
+ oldbytes[ii]=b[ii+x];
+ break;
+ }
+
+ //otherwise x is beginning of character string, y is end
+ if (isnumber) {
+ if (j>=0) {
+ buf[i][j]=(float)Double.parseDouble(new String(b,x,y-x));
+ }
+ j++;
+ }
+ if (cr) {
+ i++;//skip to next line
+ j = -1;//don't store line number
+ x=y;//skip to end of number
+ x++;//skip past return
+ } else {
+ x=y;//skip to end of number
+ x++;//skip past space
+ }
+ }
+ }
+ inputFile.close();
+ }
+}
+
+/* =============================================================================
+ *
+ * End of kmeans.java
+ *
+ * =============================================================================
+ */
--- /dev/null
+import Barrier;
+import Common;
+import GlobalArgs;
+import Normal;
+import Random;
+import System;
+import intwrapper;
+
+/* =============================================================================
+ *
+ * normal.java
+ * -- Implementation of normal k-means clustering algorithm
+ *
+ * =============================================================================
+ *
+ * Author:
+ *
+ * Wei-keng Liao
+ * ECE Department, Northwestern University
+ * email: wkliao@ece.northwestern.edu
+ *
+ *
+ * Edited by:
+ *
+ * Jay Pisharath
+ * Northwestern University.
+ *
+ * Chi Cao Minh
+ * Stanford University
+ *
+ * Alokika Dash
+ * University of California, Irvine
+ * Ported to Java
+ *
+ * =============================================================================
+ *
+ * For the license of bayes/sort.h and bayes/sort.c, please see the header
+ * of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of kmeans, please see kmeans/LICENSE.kmeans
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of ssca2, please see ssca2/COPYRIGHT
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
+ * header of the files.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * For the license of lib/rbtree.h and lib/rbtree.c, please see
+ * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
+ *
+ * ------------------------------------------------------------------------
+ *
+ * Unless otherwise noted, the following license applies to STAMP files:
+ *
+ * Copyright (c) 2007, Stanford University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Stanford University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * =============================================================================
+ */
+
+public class Normal {
+ int CHUNK;
+
+ public Normal() {
+ CHUNK = 3;
+ }
+
+ /*
+ * ==========================================================================
+ * === work
+ * ==================================================================
+ * ===========
+ */
+ public static void work(int myId, GlobalArgs args) {
+
+ float[][] feature = args.feature;
+ int nfeatures = args.nfeatures;
+ int npoints = args.npoints;
+ int nclusters = args.nclusters;
+ int[] membership = args.membership;
+ float[][] clusters = args.clusters;
+ int[] new_centers_len = args.new_centers_len;
+ float[][] new_centers = args.new_centers;
+ float delta = 0.0f;
+ int start, stop;
+
+ int CHUNK=500;
+ start = myId * CHUNK;
+
+ while (start < npoints) {
+ stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints);
+
+ sese parallel{
+ int indexArrayLen=stop-start;
+ int indexArray[]=disjoint IDXARRAY new int[indexArrayLen];
+ int pidx=0;
+ for (int i = start; i < stop; i++) {
+ int index = Common.common_findNearestPoint(feature[i],
+ nfeatures,
+ clusters,
+ nclusters);
+ indexArray[pidx]=index;
+ pidx++;
+ }
+ System.out.println("p");
+ }
+
+ sese serial{
+
+ int sidx=0;
+ for (int i = start; i < stop; i++) {
+
+ int newIndex=indexArray[sidx];
+ if (membership[i] != newIndex) {
+ delta += 1.0f;
+ }
+
+ membership[i] = newIndex;
+
+ new_centers_len[newIndex] = new_centers_len[newIndex] + 1;
+ for (int j = 0; j < nfeatures; j++) {
+ new_centers[newIndex][j] = new_centers[newIndex][j] + feature[i][j];
+ }
+
+ sidx++;
+ }
+
+ }
+
+
+ if (start + CHUNK < npoints) {
+// atomic {
+ start = args.global_i;
+ args.global_i = start + CHUNK;
+// }
+ } else {
+ break;
+ }
+ }
+
+
+/*
+ while (start < npoints) {
+ stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints);
+
+ for (int i = start; i < stop; i++) {
+
+ sese parallel{
+ int index = Common.common_findNearestPoint(feature[i],
+ nfeatures,
+ clusters,
+ nclusters);
+ }
+
+ sese serial{
+
+ // If membership changes, increase delta by 1.
+ // membership[i] cannot be changed by other threads
+ if (membership[i] != index) {
+ delta += 1.0f;
+ }
+
+ // Assign the membership to object i
+ // membership[i] can't be changed by other thread
+ membership[i] = index;
+
+ // Update new cluster centers : sum of objects located within
+ new_centers_len[index] = new_centers_len[index] + 1;
+ for (int j = 0; j < nfeatures; j++) {
+ new_centers[index][j] = new_centers[index][j] + feature[i][j];
+ }
+ }
+
+ }
+
+ // Update task queue
+ if (start + CHUNK < npoints) {
+ start = args.global_i;
+ args.global_i = start + CHUNK;
+ } else {
+ break;
+ }
+ }//end of while
+*/
+ args.global_delta = args.global_delta + delta;
+ }
+
+ /*
+ * ==========================================================================
+ * === normal_exec
+ * ==========================================================
+ * ===================
+ */
+ public float[][] normal_exec(int nthreads, float[][] feature, /*
+ * in:
+ * [npoints][
+ * nfeatures]
+ */
+ int nfeatures, int npoints, int nclusters, float threshold,
+ int[] membership, Random randomPtr, /* out: [npoints] */
+ GlobalArgs args) {
+ float delta;
+ float[][] clusters; /* out: [nclusters][nfeatures] */
+
+ /* Allocate space for returning variable clusters[] */
+ clusters = new float[nclusters][nfeatures];
+
+ /* Randomly pick cluster centers */
+ for (int i = 0; i < nclusters; i++) {
+ int n = (int) (randomPtr.random_generate() % npoints);
+ for (int j = 0; j < nfeatures; j++) {
+ clusters[i][j] = feature[n][j];
+ }
+ }
+
+ for (int i = 0; i < npoints; i++) {
+ membership[i] = -1;
+ }
+
+ /*
+ * Need to initialize new_centers_len and new_centers[0] to all 0.
+ * Allocate clusters on different cache lines to reduce false sharing.
+ */
+ int[] new_centers_len = new int[nclusters];
+
+ float[][] new_centers = new float[nclusters][nfeatures];
+
+ int loop = 0;
+
+ long start = System.currentTimeMillis();
+ do {
+ delta = 0.0f;
+
+ args.feature = feature;
+ args.nfeatures = nfeatures;
+ args.npoints = npoints;
+ args.nclusters = nclusters;
+ args.membership = membership;
+ args.clusters = clusters;
+ args.new_centers_len = new_centers_len;
+ args.new_centers = new_centers;
+
+ args.global_i = nthreads * CHUNK;
+ args.global_delta = delta;
+
+ // Work in parallel with other threads
+ thread_work(args);
+ System.out.println("RRRR");
+ delta = args.global_delta;
+
+ /* Replace old cluster centers with new_centers */
+ for (int i = 0; i < nclusters; i++) {
+ for (int j = 0; j < nfeatures; j++) {
+ if (new_centers_len[i] > 0) {
+ clusters[i][j] = new_centers[i][j] / new_centers_len[i];
+ }
+ new_centers[i][j] = (float) 0.0; /* set back to 0 */
+ }
+ new_centers_len[i] = 0; /* set back to 0 */
+ }
+
+ delta /= npoints;
+
+ } while ((delta > threshold) && (loop++ < 500));
+ long stop = System.currentTimeMillis();
+ args.global_time += (stop - start);
+
+ return clusters;
+ }
+
+ /**
+ * Work done by primary thread in parallel with other threads
+ **/
+ void thread_work(GlobalArgs args) {
+ Normal.work(0, args); // threadId = 0 because primary thread
+ }
+}
+
+/*
+ * =============================================================================
+ *
+ * End of normal.java
+ *
+ * =============================================================================
+ */
+
+
--- /dev/null
+public class Random {
+ int[] mt;
+ int mti;
+ int RANDOM_DEFAULT_SEED;
+ /* period parameter */
+ int N;
+ int M;
+ int MATRIX_A;
+ int UPPER_MASK;
+ int LOWER_MASK;
+ int[] mag01;
+
+ public Random() {
+ RANDOM_DEFAULT_SEED = 0;
+ N = 624;
+ M = 397;
+ mt = new int[N];
+ mti = N;
+ MATRIX_A = 0x9908b0df; /* constant vector a */
+ UPPER_MASK = 0x80000000; /* most significant w-r bits */
+ LOWER_MASK = 0x7fffffff; /* least significant r bits */
+ mag01 = new int[2];
+ mag01[0] = 0x0;
+ mag01[1] = MATRIX_A;
+
+ }
+
+ public void random_alloc() {
+ init_genrand(this.RANDOM_DEFAULT_SEED);
+ }
+
+ /* initializes mt[N] with a seed */
+ public void init_genrand(int s) {
+ mt[0]= s & 0xFFFFFFFF;
+ for (int mti=1; mti<N; mti++) {
+ mt[mti] = (1812433253 * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ mt[mti] &= 0xFFFFFFFF;
+ /* for >32 bit machines */
+ }
+ this.mti=mti;
+ }
+
+ public void random_seed(int seed) {
+ init_genrand(seed);
+ }
+
+ public int random_generate() {
+ return genrand_int32();
+ }
+
+ public int posrandom_generate() {
+ int r=genrand_int32();
+ if (r>0)
+ return r;
+ else
+ return -r;
+ }
+
+ public int genrand_int32() {
+ int y;
+ int mti = this.mti;
+
+ /* mag01[x] = x * MATRIX_A for x=0,1 */
+
+ if (mti >= 624) { /* generate N words at one time */
+ int kk;
+ int[] mt = this.mt;
+
+ if (mti == 624+1) /* if init_genrand() has not been called, */
+ init_genrand(5489); /* a default initial seed is used */
+
+ for (kk=0;kk<(624-397);kk++) {
+ y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff);
+ mt[kk] = mt[kk+397] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df);
+ }
+ for (;kk<(624-1);kk++) {
+ y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff);
+ mt[kk] = mt[kk+(397-624)] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df);
+ }
+ y = (mt[624-1]&0x80000000)|(mt[0]&0x7fffffff);
+ mt[624-1] = mt[397-1] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df);
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+
+ /* Tempering */
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680;
+ y ^= (y << 15) & 0xefc60000;
+ y ^= (y >> 18);
+
+ this.mti = mti;
+
+ return y;
+ }
+}
--- /dev/null
+public class Random {
+ long[] mt;
+ int mti;
+ int RANDOM_DEFAULT_SEED;
+ /* period parameter */
+
+
+ public Random() {
+ RANDOM_DEFAULT_SEED = 0;
+ mt = new long[624];
+ }
+
+ public void random_alloc() {
+ init_genrand(this.RANDOM_DEFAULT_SEED);
+ }
+
+ /* initializes mt[N] with a seed */
+ public void init_genrand(int s) {
+ mt[0]= ((long)s) & 0xFFFFFFFFL;
+ for (int mti=1; mti<624; mti++) {
+ mt[mti] = (1812433253L * (mt[mti-1] ^ (mt[mti-1] >> 30)) + ((long)mti));
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ mt[mti] &= 0xFFFFFFFFL;
+ /* for >32 bit machines */
+ }
+ this.mti=624;
+ }
+
+ public void random_seed(int seed) {
+ init_genrand(seed);
+ }
+
+ public long random_generate() {
+ long x= genrand_int32()&0xFFFFFFFFL;
+ return x;
+ }
+
+ public long posrandom_generate() {
+ long r=genrand_int32();
+ if (r>0)
+ return r;
+ else
+ return -r;
+ }
+
+ public long genrand_int32() {
+ long y;
+ int mti = this.mti;
+ long[] mt = this.mt;
+
+ if (mti >= 624) { /* generate N words at one time */
+ int kk;
+
+ if (mti == 624+1) { /* if init_genrand() has not been called, */
+ init_genrand(5489); /* a default initial seed is used */
+ mti=this.mti;
+ }
+ for (kk=0;kk<(624-397);kk++) {
+ y = (mt[kk]&0x80000000L)|(mt[kk+1]&0x7fffffffL);
+ mt[kk] = mt[kk+397] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0L:0x9908b0dfL);
+ }
+ for (;kk<(624-1);kk++) {
+ y = (mt[kk]&0x80000000L)|(mt[kk+1]&0x7fffffffL);
+ mt[kk] = mt[kk+(397-624)] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0L:0x9908b0dfL);
+ }
+ y = (mt[624-1]&0x80000000L)|(mt[0]&0x7fffffffL);
+ mt[624-1] = mt[397-1] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0L:0x9908b0dfL);
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+
+ /* Tempering */
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680L;
+ y ^= (y << 15) & 0xefc60000L;
+ y ^= (y >> 18);
+
+ this.mti = mti;
+
+ return y;
+ }
+}
--- /dev/null
+BUILDSCRIPT=../../../buildscript
+MAINCLASS=KMeans
+SRC=KMeans.java \
+ Random.java \
+ Cluster.java \
+ Normal.java \
+ Common.java \
+ GlobalArgs.java
+
+#DEBUGFLAGS= -disjoint-debug-callsite MDRunner t3 100
+#DEBUGFLAGS= -disjoint-debug-callsite calcGoodFeature calcGoodFeatureTask 100
+#DEBUGFLAGS= -disjoint-debug-callsite getRows calcGoodFeature 4
+#DEBUGFLAGS= -disjoint-debug-callsite setKMeans t3 500
+
+#SNAPFLAGS= -disjoint-debug-snap-method calcGoodFeatureTask 5 10 true
+#SNAPFLAGS= -disjoint-debug-snap-method calcGoodFeature 5 1 true
+
+#SNAPFLAGS= -disjoint-debug-snap-method t3 5 20 true
+
+BSFLAGS= -justanalyze -disjoint -disjoint-k 1 -disjoint-write-dots all -disjoint-alias-file aliases.txt normal -enable-assertions -mainclass ${MAINCLASS}
+
+all:
+ $(BUILDSCRIPT) $(BSFLAGS) $(DEBUGFLAGS) $(SNAPFLAGS) ${SRC}
+
+clean:
+ rm -f *.bin
+ rm -fr tmpbuilddirectory
+ rm -f *~
+ rm -f *.dot
+ rm -f *.png
+ rm -f *.aux
+ rm -f *.log
+ rm -f *.pdf
+ rm -f aliases.txt
+ rm -f tabResults.tex