From ad8e707a13cf1712aaed1d0cfc52a196090b5bf2 Mon Sep 17 00:00:00 2001 From: yeom Date: Thu, 25 Mar 2010 22:55:53 +0000 Subject: [PATCH] add annoated kmeans --- .../Tests/disjoint/kmeans_new/Cluster.java | 193 ++++++++ .../src/Tests/disjoint/kmeans_new/Common.java | 129 +++++ .../Tests/disjoint/kmeans_new/GlobalArgs.java | 80 +++ .../src/Tests/disjoint/kmeans_new/KMeans.java | 454 ++++++++++++++++++ .../src/Tests/disjoint/kmeans_new/Normal.java | 322 +++++++++++++ .../src/Tests/disjoint/kmeans_new/Random.java | 102 ++++ .../disjoint/kmeans_new/RandomExact.java | 87 ++++ Robust/src/Tests/disjoint/kmeans_new/makefile | 35 ++ 8 files changed, 1402 insertions(+) create mode 100644 Robust/src/Tests/disjoint/kmeans_new/Cluster.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/Common.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/GlobalArgs.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/KMeans.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/Normal.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/Random.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/RandomExact.java create mode 100644 Robust/src/Tests/disjoint/kmeans_new/makefile diff --git a/Robust/src/Tests/disjoint/kmeans_new/Cluster.java b/Robust/src/Tests/disjoint/kmeans_new/Cluster.java new file mode 100644 index 00000000..e35be987 --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/Cluster.java @@ -0,0 +1,193 @@ +/* ============================================================================= + * + * cluster.java + * + * ============================================================================= + * + * Description: + * + * Takes as input a file, containing 1 data point per per line, and performs a + * fuzzy c-means clustering on the data. Fuzzy clustering is performed using + * min to max clusters and the clustering that gets the best score according to + * a compactness and separation criterion are returned. + * + * + * Author: + * + * Brendan McCane + * James Cook University of North Queensland. Australia. + * email: mccane@cs.jcu.edu.au + * + * + * Edited by: + * + * Jay Pisharath, Wei-keng Liao + * Northwestern University + * + * Chi Cao Minh + * Stanford University + * + * Ported to Java: + * Alokika Dash + * University of California, Irvine + * + * ============================================================================= + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. +* +* ============================================================================= +*/ + +public class Cluster { + + public Cluster() { + + } + + /* ============================================================================= + * extractMoments + * ============================================================================= + */ + public static float[] + extractMoments (float []data, int num_elts, int num_moments) + { + float[] moments = new float[num_moments]; + + for (int i = 0; i < num_elts; i++) { + moments[0] += data[i]; + } + + moments[0] = moments[0] / num_elts; + for (int j = 1; j < num_moments; j++) { + moments[j] = 0; + for (int i = 0; i < num_elts; i++) { + moments[j] = (float)(moments[j]+ Math.pow((data[i]-moments[0]), j+1)); + } + moments[j] = moments[j] / num_elts; + } + return moments; + } + + + /* ============================================================================= + * zscoreTransform + * ============================================================================= + */ + public static void + zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */ + int numObjects, + int numAttributes) + { + float[] moments; + float[] single_variable = new float[numObjects]; + for (int i = 0; i < numAttributes; i++) { + for (int j = 0; j < numObjects; j++) { + single_variable[j] = data[j][i]; + } + moments = extractMoments(single_variable, numObjects, 2); + moments[1] = (float) Math.sqrt((double)moments[1]); + for (int j = 0; j < numObjects; j++) { + data[j][i] = (data[j][i]-moments[0])/moments[1]; + } + } + } + + + /* ============================================================================= + * cluster_exec + * ============================================================================= + */ + public static void + cluster_exec ( + int nthreads, /* in: number of threads*/ + int numObjects, /* number of input objects */ + int numAttributes, /* size of attribute of each object */ + float[][] attributes, /* [numObjects][numAttributes] */ + KMeans kms, /* KMeans class hold the inputs and outputs */ + GlobalArgs args /* Global thread arguments */ + ) + { + int itime; + int nclusters; + + float[][] tmp_cluster_centres; + int[] membership = new int[numObjects]; + + Random randomPtr = new Random(); + randomPtr.random_alloc(); + + if (kms.use_zscore_transform == 1) { + zscoreTransform(attributes, numObjects, numAttributes); + } + + itime = 0; + + /* + * From min_nclusters to max_nclusters, find best_nclusters + */ + for (nclusters = kms.min_nclusters; nclusters <= kms.max_nclusters; nclusters++) { + + randomPtr.random_seed(7); + + Normal norm = new Normal(); + + tmp_cluster_centres = norm.normal_exec(nthreads, + attributes, + numAttributes, + numObjects, + nclusters, + kms.threshold, + membership, + randomPtr, + args); + + kms.cluster_centres = tmp_cluster_centres; + kms.best_nclusters = nclusters; + + + itime++; + } /* nclusters */ + } +} + +/* ============================================================================= + * + * End of cluster.java + * + * ============================================================================= + */ diff --git a/Robust/src/Tests/disjoint/kmeans_new/Common.java b/Robust/src/Tests/disjoint/kmeans_new/Common.java new file mode 100644 index 00000000..6e777eba --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/Common.java @@ -0,0 +1,129 @@ +/* ============================================================================= + * + * common.java + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + +public class Common { + + public Common() { + } + + + /* ============================================================================= + * common_euclidDist2 + * -- multi-dimensional spatial Euclid distance square + * ============================================================================= + */ + public static float + common_euclidDist2 (float[] pt1, float[] pt2, int numdims) + { + int i; + float ans = 0.0f; + + for (i = 0; i < numdims; i++) { + ans += (pt1[i] - pt2[i]) * (pt1[i] - pt2[i]); + } + + return ans; + } + + + /* ============================================================================= + * common_findNearestPoint + * ============================================================================= + */ + public static int + common_findNearestPoint (float[] pt, /* [nfeatures] */ + int nfeatures, + float[][] pts, /* [npts][nfeatures] */ + int npts) + { + int index = -1; + int i; + //double max_dist = FLT_MAX; + float max_dist = (float)3.40282347e+38f; + float limit = (float) 0.99999; + + /* Find the cluster center id with min distance to pt */ + for (i = 0; i < npts; i++) { + float dist = common_euclidDist2(pt, pts[i], nfeatures); /* no need square root */ + if ((dist / max_dist) < limit) { + max_dist = dist; + index = i; + if (max_dist == 0) { + break; + } + } + } + + return index; + } +} + + +/* ============================================================================= + * + * End of common.java + * + * ============================================================================= + */ diff --git a/Robust/src/Tests/disjoint/kmeans_new/GlobalArgs.java b/Robust/src/Tests/disjoint/kmeans_new/GlobalArgs.java new file mode 100644 index 00000000..10673bdc --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/GlobalArgs.java @@ -0,0 +1,80 @@ +/* ============================================================================== + * + * GlobalArgs.java + * -- Class that holds all the global parameters used by each thread + * during parallel execution + * + * ============================================================================= + * Author: + * + * Alokika Dash + * University of California, Irvine + * email adash@uci.edu + * + * ============================================================================= + */ + +public class GlobalArgs { + + public GlobalArgs() { + + } + + /** + * Number of threads + **/ + public int nthreads; + + /** + * List of attributes + **/ + public float[][] feature; + + /** + * Number of attributes per Object + **/ + public int nfeatures; + + /** + * Number of Objects + **/ + public int npoints; + + + /** + * Iteration id between min_nclusters to max_nclusters + **/ + public int nclusters; + + + /** + * Array that holds change index of cluster center per thread + **/ + public int[] membership; + + /** + * + **/ + public float[][] clusters; + + + /** + * Number of points in each cluster [nclusters] + **/ + public int[] new_centers_len; + + /** + * New centers of the clusters [nclusters][nfeatures] + **/ + public float[][] new_centers; + + /** + * + **/ + public int global_i; + + public float global_delta; + + long global_time; + +} diff --git a/Robust/src/Tests/disjoint/kmeans_new/KMeans.java b/Robust/src/Tests/disjoint/kmeans_new/KMeans.java new file mode 100644 index 00000000..8ef14097 --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/KMeans.java @@ -0,0 +1,454 @@ + /* ============================================================================= + * + * kmeans.java + * + * ============================================================================= + * + * Description: + * + * Takes as input a file: + * ascii file: containing 1 data point per line + * binary file: first int is the number of objects + * 2nd int is the no. of features of each object + * + * This example performs a fuzzy c-means clustering on the data. Fuzzy clustering + * is performed using min to max clusters and the clustering that gets the best + * score according to a compactness and separation criterion are returned. + * + * + * Author: + * + * Wei-keng Liao + * ECE Department Northwestern University + * email: wkliao@ece.northwestern.edu + * + * + * Edited by: + * + * Jay Pisharath + * Northwestern University + * + * Chi Cao Minh + * Stanford University + * + * Port to Java version + * Alokika Dash + * University of California, Irvine + * + * ============================================================================= + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + +public class KMeans /*extends Thread*/ { + /** + * User input for max clusters + **/ + int max_nclusters; + + /** + * User input for min clusters + **/ + int min_nclusters; + + /** + * Check for Binary file + **/ + int isBinaryFile; + + /** + * Using zscore transformation for cluster center + * deviating from distribution's mean + **/ + int use_zscore_transform; + + /** + * Input file name used for clustering + **/ + String filename; + + /** + * Total number of threads + **/ + int nthreads; + + /** + * threshold until which kmeans cluster continues + **/ + float threshold; + + /** + * thread id + **/ + int threadid; + + /** + * Global arguments for threads + **/ + GlobalArgs g_args; + + /** + * Output: Number of best clusters + **/ + int best_nclusters; + + /** + * Output: Cluster centers + **/ + float[][] cluster_centres; + + public KMeans() { + max_nclusters = 13; + min_nclusters = 4; + isBinaryFile = 0; + use_zscore_transform = 1; + threshold = (float) 0.001; + best_nclusters=0; + } + + public KMeans(int threadid, GlobalArgs g_args) { + this.threadid = threadid; + this.g_args = g_args; + } + + public void run() { + while(true) { +// Barrier.enterBarrier(); + Normal.work(threadid, g_args); +// Barrier.enterBarrier(); + } + } + + /* ============================================================================= + * main + * ============================================================================= + */ + public static void main(String[] args) { + int nthreads; + int MAX_LINE_LENGTH = 1000000; /* max input is 400000 one digit input + spaces */ + + /** + * Read options fron the command prompt + **/ + KMeans kms = new KMeans(); + KMeans.parseCmdLine(args, kms); + nthreads = kms.nthreads; + + /* Initiate Barriers */ +// Barrier.setBarrier(nthreads); + + if (kms.max_nclusters < kms.min_nclusters) { + System.out.println("Error: max_clusters must be >= min_clusters\n"); + System.exit(0); + } + + float[][] buf; + float[][] attributes; + int numAttributes = 0; + int numObjects = 0; + + /* + * From the input file, get the numAttributes (columns in txt file) and numObjects (rows in txt file) + */ + if (kms.isBinaryFile == 1) { + System.out.println("TODO: Unimplemented Binary file option\n"); + System.exit(0); + } + + FileInputStream inputFile = new FileInputStream(kms.filename); + byte b[] = new byte[MAX_LINE_LENGTH]; + int n; + while ((n = inputFile.read(b)) != 0) { + for (int i = 0; i < n; i++) { + if (b[i] == '\n') + numObjects++; + } + } + inputFile.close(); + inputFile = new FileInputStream(kms.filename); + String line = null; + if((line = inputFile.readLine()) != null) { + int index = 0; + boolean prevWhiteSpace = true; + while(index < line.length()) { + char c = line.charAt(index++); + boolean currWhiteSpace = Character.isWhitespace(c); + if(prevWhiteSpace && !currWhiteSpace){ + numAttributes++; + } + prevWhiteSpace = currWhiteSpace; + } + } + inputFile.close(); + + /* Ignore the first attribute: numAttributes = 1; */ + numAttributes = numAttributes - 1; + System.out.println("numObjects= " + numObjects + " numAttributes= " + numAttributes); + + /* Allocate new shared objects and read attributes of all objects */ + buf = new float[numObjects][numAttributes]; + attributes = new float[numObjects][numAttributes]; + KMeans.readFromFile(inputFile, kms.filename, buf, MAX_LINE_LENGTH); + System.out.println("Finished Reading from file ......"); + + /* + * The core of the clustering + */ + + int nloops = 1; + int len = kms.max_nclusters - kms.min_nclusters + 1; + + KMeans[] km = new KMeans[nthreads]; + GlobalArgs g_args = disjoint GARGS new GlobalArgs(); + g_args.nthreads = nthreads; + + /* Create and Start Threads */ + for(int i = 1; i -n -t -i -nthreads \n"); + System.out.println( " -i filename: file containing data to be clustered\n"); + System.out.println( " -b input file is in binary format\n"); + System.out.println( " -m max_clusters: maximum number of clusters allowed\n"); + System.out.println( " -n min_clusters: minimum number of clusters allowed\n"); + System.out.println( " -z : don't zscore transform data\n"); + System.out.println( " -t threshold : threshold value\n"); + System.out.println( " -nthreads : number of threads\n"); + } + + /** + * readFromFile() + * Read attributes from the input file into an array + **/ + public static void readFromFile(FileInputStream inputFile, String filename, float[][] buf, int MAX_LINE_LENGTH) { + inputFile = new FileInputStream(filename); + int j; + int i = 0; + + byte b[] = new byte[MAX_LINE_LENGTH]; + int n; + byte oldbytes[]=null; + + + j = -1; + while ((n = inputFile.read(b)) != 0) { + int x=0; + + if (oldbytes!=null) { + //find space + boolean cr=false; + for (;x < n; x++) { + if (b[x] == ' ') + break; + if (b[x] == '\n') { + cr=true; + break; + } + } + byte newbytes[]=new byte[x+oldbytes.length]; + boolean isnumber=false; + for(int ii=0;ii='0'&&oldbytes[ii]<='9') + isnumber=true; + newbytes[ii]=oldbytes[ii]; + } + for(int ii=0;ii='0'&&b[ii]<='9') + isnumber=true; + newbytes[ii+oldbytes.length]=b[ii]; + } + if (x!=n) + x++; //skip past space or cr + if (isnumber) { + if (j>=0) { + buf[i][j]=(float)Double.parseDouble(new String(newbytes, 0, newbytes.length)); + } + j++; + } + if (cr) { + j=-1; + i++; + } + oldbytes=null; + } + + while (x < n) { + int y=x; + boolean cr=false; + boolean isnumber=false; + for(y=x;y='0')&&(b[y]<='9')) + isnumber=true; + if (b[y]==' ') + break; + if (b[y]=='\n') { + cr=true; + break; + } + } + if (y==n) { + //need to continue for another read + oldbytes=new byte[y-x]; + for(int ii=0;ii<(y-x);ii++) + oldbytes[ii]=b[ii+x]; + break; + } + + //otherwise x is beginning of character string, y is end + if (isnumber) { + if (j>=0) { + buf[i][j]=(float)Double.parseDouble(new String(b,x,y-x)); + } + j++; + } + if (cr) { + i++;//skip to next line + j = -1;//don't store line number + x=y;//skip to end of number + x++;//skip past return + } else { + x=y;//skip to end of number + x++;//skip past space + } + } + } + inputFile.close(); + } +} + +/* ============================================================================= + * + * End of kmeans.java + * + * ============================================================================= + */ diff --git a/Robust/src/Tests/disjoint/kmeans_new/Normal.java b/Robust/src/Tests/disjoint/kmeans_new/Normal.java new file mode 100644 index 00000000..70904099 --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/Normal.java @@ -0,0 +1,322 @@ +import Barrier; +import Common; +import GlobalArgs; +import Normal; +import Random; +import System; +import intwrapper; + +/* ============================================================================= + * + * normal.java + * -- Implementation of normal k-means clustering algorithm + * + * ============================================================================= + * + * Author: + * + * Wei-keng Liao + * ECE Department, Northwestern University + * email: wkliao@ece.northwestern.edu + * + * + * Edited by: + * + * Jay Pisharath + * Northwestern University. + * + * Chi Cao Minh + * Stanford University + * + * Alokika Dash + * University of California, Irvine + * Ported to Java + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + +public class Normal { + int CHUNK; + + public Normal() { + CHUNK = 3; + } + + /* + * ========================================================================== + * === work + * ================================================================== + * =========== + */ + public static void work(int myId, GlobalArgs args) { + + float[][] feature = args.feature; + int nfeatures = args.nfeatures; + int npoints = args.npoints; + int nclusters = args.nclusters; + int[] membership = args.membership; + float[][] clusters = args.clusters; + int[] new_centers_len = args.new_centers_len; + float[][] new_centers = args.new_centers; + float delta = 0.0f; + int start, stop; + + int CHUNK=500; + start = myId * CHUNK; + + while (start < npoints) { + stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints); + + sese parallel{ + int indexArrayLen=stop-start; + int indexArray[]=disjoint IDXARRAY new int[indexArrayLen]; + int pidx=0; + for (int i = start; i < stop; i++) { + int index = Common.common_findNearestPoint(feature[i], + nfeatures, + clusters, + nclusters); + indexArray[pidx]=index; + pidx++; + } + System.out.println("p"); + } + + sese serial{ + + int sidx=0; + for (int i = start; i < stop; i++) { + + int newIndex=indexArray[sidx]; + if (membership[i] != newIndex) { + delta += 1.0f; + } + + membership[i] = newIndex; + + new_centers_len[newIndex] = new_centers_len[newIndex] + 1; + for (int j = 0; j < nfeatures; j++) { + new_centers[newIndex][j] = new_centers[newIndex][j] + feature[i][j]; + } + + sidx++; + } + + } + + + if (start + CHUNK < npoints) { +// atomic { + start = args.global_i; + args.global_i = start + CHUNK; +// } + } else { + break; + } + } + + +/* + while (start < npoints) { + stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints); + + for (int i = start; i < stop; i++) { + + sese parallel{ + int index = Common.common_findNearestPoint(feature[i], + nfeatures, + clusters, + nclusters); + } + + sese serial{ + + // If membership changes, increase delta by 1. + // membership[i] cannot be changed by other threads + if (membership[i] != index) { + delta += 1.0f; + } + + // Assign the membership to object i + // membership[i] can't be changed by other thread + membership[i] = index; + + // Update new cluster centers : sum of objects located within + new_centers_len[index] = new_centers_len[index] + 1; + for (int j = 0; j < nfeatures; j++) { + new_centers[index][j] = new_centers[index][j] + feature[i][j]; + } + } + + } + + // Update task queue + if (start + CHUNK < npoints) { + start = args.global_i; + args.global_i = start + CHUNK; + } else { + break; + } + }//end of while +*/ + args.global_delta = args.global_delta + delta; + } + + /* + * ========================================================================== + * === normal_exec + * ========================================================== + * =================== + */ + public float[][] normal_exec(int nthreads, float[][] feature, /* + * in: + * [npoints][ + * nfeatures] + */ + int nfeatures, int npoints, int nclusters, float threshold, + int[] membership, Random randomPtr, /* out: [npoints] */ + GlobalArgs args) { + float delta; + float[][] clusters; /* out: [nclusters][nfeatures] */ + + /* Allocate space for returning variable clusters[] */ + clusters = new float[nclusters][nfeatures]; + + /* Randomly pick cluster centers */ + for (int i = 0; i < nclusters; i++) { + int n = (int) (randomPtr.random_generate() % npoints); + for (int j = 0; j < nfeatures; j++) { + clusters[i][j] = feature[n][j]; + } + } + + for (int i = 0; i < npoints; i++) { + membership[i] = -1; + } + + /* + * Need to initialize new_centers_len and new_centers[0] to all 0. + * Allocate clusters on different cache lines to reduce false sharing. + */ + int[] new_centers_len = new int[nclusters]; + + float[][] new_centers = new float[nclusters][nfeatures]; + + int loop = 0; + + long start = System.currentTimeMillis(); + do { + delta = 0.0f; + + args.feature = feature; + args.nfeatures = nfeatures; + args.npoints = npoints; + args.nclusters = nclusters; + args.membership = membership; + args.clusters = clusters; + args.new_centers_len = new_centers_len; + args.new_centers = new_centers; + + args.global_i = nthreads * CHUNK; + args.global_delta = delta; + + // Work in parallel with other threads + thread_work(args); + System.out.println("RRRR"); + delta = args.global_delta; + + /* Replace old cluster centers with new_centers */ + for (int i = 0; i < nclusters; i++) { + for (int j = 0; j < nfeatures; j++) { + if (new_centers_len[i] > 0) { + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; + } + new_centers[i][j] = (float) 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + + delta /= npoints; + + } while ((delta > threshold) && (loop++ < 500)); + long stop = System.currentTimeMillis(); + args.global_time += (stop - start); + + return clusters; + } + + /** + * Work done by primary thread in parallel with other threads + **/ + void thread_work(GlobalArgs args) { + Normal.work(0, args); // threadId = 0 because primary thread + } +} + +/* + * ============================================================================= + * + * End of normal.java + * + * ============================================================================= + */ + + diff --git a/Robust/src/Tests/disjoint/kmeans_new/Random.java b/Robust/src/Tests/disjoint/kmeans_new/Random.java new file mode 100644 index 00000000..611592b2 --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/Random.java @@ -0,0 +1,102 @@ +public class Random { + int[] mt; + int mti; + int RANDOM_DEFAULT_SEED; + /* period parameter */ + int N; + int M; + int MATRIX_A; + int UPPER_MASK; + int LOWER_MASK; + int[] mag01; + + public Random() { + RANDOM_DEFAULT_SEED = 0; + N = 624; + M = 397; + mt = new int[N]; + mti = N; + MATRIX_A = 0x9908b0df; /* constant vector a */ + UPPER_MASK = 0x80000000; /* most significant w-r bits */ + LOWER_MASK = 0x7fffffff; /* least significant r bits */ + mag01 = new int[2]; + mag01[0] = 0x0; + mag01[1] = MATRIX_A; + + } + + public void random_alloc() { + init_genrand(this.RANDOM_DEFAULT_SEED); + } + + /* initializes mt[N] with a seed */ + public void init_genrand(int s) { + mt[0]= s & 0xFFFFFFFF; + for (int mti=1; mti> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xFFFFFFFF; + /* for >32 bit machines */ + } + this.mti=mti; + } + + public void random_seed(int seed) { + init_genrand(seed); + } + + public int random_generate() { + return genrand_int32(); + } + + public int posrandom_generate() { + int r=genrand_int32(); + if (r>0) + return r; + else + return -r; + } + + public int genrand_int32() { + int y; + int mti = this.mti; + + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= 624) { /* generate N words at one time */ + int kk; + int[] mt = this.mt; + + if (mti == 624+1) /* if init_genrand() has not been called, */ + init_genrand(5489); /* a default initial seed is used */ + + for (kk=0;kk<(624-397);kk++) { + y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff); + mt[kk] = mt[kk+397] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df); + } + for (;kk<(624-1);kk++) { + y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff); + mt[kk] = mt[kk+(397-624)] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df); + } + y = (mt[624-1]&0x80000000)|(mt[0]&0x7fffffff); + mt[624-1] = mt[397-1] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df); + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680; + y ^= (y << 15) & 0xefc60000; + y ^= (y >> 18); + + this.mti = mti; + + return y; + } +} diff --git a/Robust/src/Tests/disjoint/kmeans_new/RandomExact.java b/Robust/src/Tests/disjoint/kmeans_new/RandomExact.java new file mode 100644 index 00000000..1862c3b4 --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/RandomExact.java @@ -0,0 +1,87 @@ +public class Random { + long[] mt; + int mti; + int RANDOM_DEFAULT_SEED; + /* period parameter */ + + + public Random() { + RANDOM_DEFAULT_SEED = 0; + mt = new long[624]; + } + + public void random_alloc() { + init_genrand(this.RANDOM_DEFAULT_SEED); + } + + /* initializes mt[N] with a seed */ + public void init_genrand(int s) { + mt[0]= ((long)s) & 0xFFFFFFFFL; + for (int mti=1; mti<624; mti++) { + mt[mti] = (1812433253L * (mt[mti-1] ^ (mt[mti-1] >> 30)) + ((long)mti)); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xFFFFFFFFL; + /* for >32 bit machines */ + } + this.mti=624; + } + + public void random_seed(int seed) { + init_genrand(seed); + } + + public long random_generate() { + long x= genrand_int32()&0xFFFFFFFFL; + return x; + } + + public long posrandom_generate() { + long r=genrand_int32(); + if (r>0) + return r; + else + return -r; + } + + public long genrand_int32() { + long y; + int mti = this.mti; + long[] mt = this.mt; + + if (mti >= 624) { /* generate N words at one time */ + int kk; + + if (mti == 624+1) { /* if init_genrand() has not been called, */ + init_genrand(5489); /* a default initial seed is used */ + mti=this.mti; + } + for (kk=0;kk<(624-397);kk++) { + y = (mt[kk]&0x80000000L)|(mt[kk+1]&0x7fffffffL); + mt[kk] = mt[kk+397] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0L:0x9908b0dfL); + } + for (;kk<(624-1);kk++) { + y = (mt[kk]&0x80000000L)|(mt[kk+1]&0x7fffffffL); + mt[kk] = mt[kk+(397-624)] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0L:0x9908b0dfL); + } + y = (mt[624-1]&0x80000000L)|(mt[0]&0x7fffffffL); + mt[624-1] = mt[397-1] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0L:0x9908b0dfL); + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680L; + y ^= (y << 15) & 0xefc60000L; + y ^= (y >> 18); + + this.mti = mti; + + return y; + } +} diff --git a/Robust/src/Tests/disjoint/kmeans_new/makefile b/Robust/src/Tests/disjoint/kmeans_new/makefile new file mode 100644 index 00000000..08cf218e --- /dev/null +++ b/Robust/src/Tests/disjoint/kmeans_new/makefile @@ -0,0 +1,35 @@ +BUILDSCRIPT=../../../buildscript +MAINCLASS=KMeans +SRC=KMeans.java \ + Random.java \ + Cluster.java \ + Normal.java \ + Common.java \ + GlobalArgs.java + +#DEBUGFLAGS= -disjoint-debug-callsite MDRunner t3 100 +#DEBUGFLAGS= -disjoint-debug-callsite calcGoodFeature calcGoodFeatureTask 100 +#DEBUGFLAGS= -disjoint-debug-callsite getRows calcGoodFeature 4 +#DEBUGFLAGS= -disjoint-debug-callsite setKMeans t3 500 + +#SNAPFLAGS= -disjoint-debug-snap-method calcGoodFeatureTask 5 10 true +#SNAPFLAGS= -disjoint-debug-snap-method calcGoodFeature 5 1 true + +#SNAPFLAGS= -disjoint-debug-snap-method t3 5 20 true + +BSFLAGS= -justanalyze -disjoint -disjoint-k 1 -disjoint-write-dots all -disjoint-alias-file aliases.txt normal -enable-assertions -mainclass ${MAINCLASS} + +all: + $(BUILDSCRIPT) $(BSFLAGS) $(DEBUGFLAGS) $(SNAPFLAGS) ${SRC} + +clean: + rm -f *.bin + rm -fr tmpbuilddirectory + rm -f *~ + rm -f *.dot + rm -f *.png + rm -f *.aux + rm -f *.log + rm -f *.pdf + rm -f aliases.txt + rm -f tabResults.tex -- 2.34.1