*/
public class KMeans extends Thread {
+ /**
+ * User input for max clusters
+ **/
int max_nclusters;
+
+ /**
+ * User input for min clusters
+ **/
int min_nclusters;
+
+ /**
+ * Check for Binary file
+ **/
int isBinaryFile;
+
+ /**
+ * Using zscore transformation for cluster center
+ * deviating from distribution's mean
+ **/
int use_zscore_transform;
+
+ /**
+ * Input file name used for clustering
+ **/
String filename;
+
+ /**
+ * Total number of threads
+ **/
int nthreads;
+
+ /**
+ * threshold until which kmeans cluster continues
+ **/
double threshold;
- int threadid; /* my thread id */
- /* Global arguments for threads */
+ /**
+ * thread id
+ **/
+ int threadid;
+
+ /**
+ * Global arguments for threads
+ **/
GlobalArgs g_args;
/**
*/
public static void main(String[] args) {
int nthreads;
+ int MAX_LINE_LENGTH = 1000000; /* max input is 400000 one digit input + spaces */
/**
* Read options fron the command prompt
KMeans kms = new KMeans();
KMeans.parseCmdLine(args, kms);
nthreads = kms.nthreads;
+
/* Initiate Barriers */
Barrier.setBarrier(nthreads);
int numObjects = 0;
/*
- * From the input file, get the numAttributes and numObjects
+ * From the input file, get the numAttributes (columns in txt file) and numObjects (rows in txt file)
*/
if (kms.isBinaryFile == 1) {
System.out.println("TODO: Unimplemented Binary file option\n");
System.exit(0);
}
+
FileInputStream inputFile = new FileInputStream(kms.filename);
- String line = null;
- while((line = inputFile.readLine()) != null) {
- numObjects++;
+ byte b[] = new byte[MAX_LINE_LENGTH];
+ int n;
+ while ((n = inputFile.read(b)) != 0) {
+ for (int i = 0; i < n; i++) {
+ if (b[i] == '\n')
+ numObjects++;
+ }
}
+ inputFile.close();
inputFile = new FileInputStream(kms.filename);
+ String line = null;
if((line = inputFile.readLine()) != null) {
int index = 0;
boolean prevWhiteSpace = true;
prevWhiteSpace = currWhiteSpace;
}
}
+ inputFile.close();
- /* Ignore the id (first attribute): numAttributes = 1; */
- numAttributes = numAttributes - 1; //
+ /* Ignore the first attribute: numAttributes = 1; */
+ numAttributes = numAttributes - 1;
System.out.println("numObjects= " + numObjects + " numAttributes= " + numAttributes);
/* Allocate new shared objects and read attributes of all objects */
buf = new double[numObjects][numAttributes];
attributes = new double[numObjects][numAttributes];
- KMeans.readFromFile(inputFile, kms.filename, buf);
+ KMeans.readFromFile(inputFile, kms.filename, buf, MAX_LINE_LENGTH);
+ System.out.println("Finished Reading from file ......");
/*
* The core of the clustering
/**
* readFromFile()
- * Read attributes into an array
+ * Read attributes from the input file into an array
**/
- public static void readFromFile(FileInputStream inputFile, String filename, double[][] buf) {
+ public static void readFromFile(FileInputStream inputFile, String filename, double[][] buf, int MAX_LINE_LENGTH) {
inputFile = new FileInputStream(filename);
- int i = 0;
int j;
- String line = null;
- while((line = inputFile.readLine()) != null) {
- int index=0;
- StringBuffer buffer = new StringBuffer();
+ int i = 0;
+
+ byte b[] = new byte[MAX_LINE_LENGTH];
+ int n;
+ while ((n = inputFile.read(b)) != 0) {
j = 0;
boolean skipFirstVar = true;
- while(index < line.length()) {
- char c = line.charAt(index++);
- if(c != ' ') {
- buffer.append(c);
+ StringBuffer buffer = new StringBuffer();
+ for (int x = 0; x < n; x++) {
+ if (b[x] == '\n') {
+ i++;
+ j = 0;
+ buffer = new StringBuffer();
+ skipFirstVar = true;
+ continue;
+ }
+ if (b[x] != ' ') {
+ buffer.append((char)b[x]);
} else {
if(skipFirstVar) {
skipFirstVar = false;
j++;
}
}
- i++;
}
- }
+ inputFile.close();
+ }
/**
* Convert a string into double