change doubles to floats in 2DFFT - ecoop submission

[IRC.git] / Robust / src / Benchmarks / Prefetch / 2DFFT / dsm / fft2d.java
diff --git a/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java b/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java

index dd6a7116c1bdbd3a2da87e62e34818e2b9a1b4ea..0f34980dd8e7bc87f1575765b34cae088c3c47d8 100644 (file)
--- a/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java
+++ b/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java
@@ -23,28 +23,31 @@ public class fft2d extends Thread {
      fft1d fft1, fft2;
      Barrier barr;
      barr = new Barrier("128.195.136.162");
-    double tempdataRe[][];
-    double tempdataIm[][];
+    float tempdataRe[][];
+    float tempdataIm[][];
      int rowlength, columnlength;
-    int start, end;
+    int start, end, nmatrix;
  
      // Calculate FFT for each row of the data.
      atomic {
        rowlength = data1.M;
        columnlength = data1.N;
-      tempdataRe = data1.dataRe;
-      tempdataIm = data1.dataIm;
+      nmatrix = data1.numMatrix;
        start = x0;
        end = x1;
        fft1 = new fft1d(columnlength);
        fft2 = new fft1d(rowlength);
        int l=8;
-      for (int i = x0; i < x1; i++,l++) {
-       //input of FFT
-       double inputRe[] = tempdataRe[i]; //local array
-       double inputIm[] = tempdataIm[i];
-       fft(fft1, inputRe, inputIm);
-      } //end of for
+      for(int z=0; z<nmatrix; z++) {
+        tempdataRe = data1.dataRe[z];
+        tempdataIm = data1.dataIm[z];
+        for (int i = start; i < end; i++,l++) {
+          //input of FFT
+          float inputRe[] = tempdataRe[i]; //local array //remote reads here for NPNC
+          float inputIm[] = tempdataIm[i];//remote reads here for NPNC
+          fft(fft1, inputRe, inputIm);
+        } //end of for
+      }
      }
  
      //Start Barrier
@@ -53,7 +56,11 @@ public class fft2d extends Thread {
      // Tranpose data.
      if (start == 0) {
        atomic {
-        transpose(tempdataRe, tempdataIm, rowlength, columnlength);
+        for(int z=0; z<nmatrix; z++) {
+          tempdataRe = data1.dataRe[z];
+          tempdataIm = data1.dataIm[z];
+          transpose(tempdataRe, tempdataIm, rowlength, columnlength);
+        }
        }
      }
  
@@ -61,26 +68,28 @@ public class fft2d extends Thread {
      Barrier.enterBarrier(barr);
  
      // Calculate FFT for each column of the data.
-    double transtempRe[][];
-    double transtempIm[][];
+    float transtempRe[][];
+    float transtempIm[][];
      atomic {
-      transtempRe = data1.dataRe;
-      transtempIm = data1.dataIm;
-      int l=8;
-      for (int j = start; j < end; j++,l++) {
-       //input of FFT
-       double inputRe[] = transtempRe[j]; //local array
-       double inputIm[] = transtempIm[j];
-       fft(fft2, inputRe, inputIm);
-      } //end of fft2 for
+      for(int z=0; z<nmatrix; z++) {
+        transtempRe = data1.dataRe[z];
+        transtempIm = data1.dataIm[z];
+        int l=8;
+        for (int j = start; j < end; j++,l++) {
+          //input of FFT
+          float inputRe[] = transtempRe[j]; //local array //Remote reads here
+          float inputIm[] = transtempIm[j]; //remote reads here
+          fft(fft2, inputRe, inputIm);
+        } //end of fft2 for
+      }
      }
    } //end of run
  
-  public void transpose(double[][] tempdataRe, double[][] tempdataIm, int rowlength, int columnlength) {
+  public void transpose(float[][] tempdataRe, float[][] tempdataIm, int rowlength, int columnlength) {
      for(int i = 0; i<rowlength; i++) {
-      double tRe[] = tempdataRe[i];
-      double tIm[] = tempdataIm[i];
-      double a;
+      float tRe[] = tempdataRe[i];
+      float tIm[] = tempdataIm[i];
+      float a;
  
        for(int j = 0; j<i; j++) {
         a=tempdataRe[j][i];
@@ -95,15 +104,19 @@ public class fft2d extends Thread {
  
    public static void main(String[] args) {
      int NUM_THREADS = 1;
+    int NUM_MATRIX = 1;
      int SIZE = 800;
      int inputWidth = 10;
      if(args.length>0) {
        NUM_THREADS=Integer.parseInt(args[0]);
-      if(args.length > 1)
+      if(args.length > 1){
         SIZE = Integer.parseInt(args[1]);
+    if(args.length > 2)
+      NUM_MATRIX = Integer.parseInt(args[2]);
+      }
      }
  
-    System.printString("Num threads = " + NUM_THREADS + " SIZE= " + SIZE + "\n");
+    System.printString("Num threads = " + NUM_THREADS + " SIZE= " + SIZE + " NUM_MATRIX= " + NUM_MATRIX +"\n");
  
      int[] mid = new int[8];
      mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
@@ -129,7 +142,7 @@ public class fft2d extends Thread {
      fft2d[] myfft2d;
      atomic {
        // Set up data for FFT transform
-      data1 = global new Matrix(SIZE, SIZE);
+      data1 = global new Matrix(SIZE, SIZE, NUM_MATRIX);
        data1.setValues(); //Input Matrix
        myfft2d = global new fft2d[NUM_THREADS];
        int increment = SIZE/NUM_THREADS;
@@ -171,21 +184,19 @@ public class fft2d extends Thread {
      System.printString("2DFFT done! \n");
    }
  
-  public static void fft(fft1d myfft, double inputRe[], double inputIm[]) {
+  public static void fft(fft1d myfft, float inputRe[], float inputIm[]) {
      //output of FFT
-    double outputRe[] = myfft.outputRe;
-    double outputIm[] = myfft.outputIm;
+    float outputRe[] = myfft.outputRe;
+    float outputIm[] = myfft.outputIm;
      // intermediate results
-    double temRe[] = myfft.temRe;
-    double temIm[] = myfft.temIm;
+    float temRe[] = myfft.temRe;
+    float temIm[] = myfft.temIm;
      //Permute() operation
      permute(myfft, outputRe, outputIm, inputRe, inputIm);
  
-    //System.printString("ready to twiddle");
      for (int factorIndex = 0; factorIndex < myfft.NumofFactors; factorIndex++)
        twiddle(factorIndex, myfft, temRe, temIm, outputRe, outputIm);
  
-    //System.printString("ready to copy");
      // Copy the output[] data to input[], so the output can be
      // returned in the input array.
      for (int i = 0; i < myfft.N; i++) {
@@ -194,7 +205,7 @@ public class fft2d extends Thread {
      }
    }
  
-  private static void permute(fft1d myfft, double[] outputRe, double[] outputIm, double[] inputRe, double[] inputIm) {
+  private static void permute(fft1d myfft, float[] outputRe, float[] outputIm, float[] inputRe, float[] inputIm) {
      int count[] = new int[myfft.MaxFactorsNumber];
      int j;
      int k = 0;
@@ -221,28 +232,27 @@ public class fft2d extends Thread {
      outputIm[myfft.N - 1] = inputIm[myfft.N - 1];
    }   // End of function permute().
  
-  private static void twiddle(int factorIndex, fft1d myfft, double[] temRe, double[] temIm,
-                              double[] outputRe, double[] outputIm) {
+  private static void twiddle(int factorIndex, fft1d myfft, float[] temRe, float[] temIm,
+                              float[] outputRe, float[] outputIm) {
      // Get factor data.
      int sofarRadix = myfft.sofar[factorIndex];
      int radix = myfft.factors[factorIndex];
      int remainRadix = myfft.remain[factorIndex];
  
-    double tem;   // Temporary variable to do data exchange.
+    float tem;   // Temporary variable to do data exchange.
  
-    double W = 2 * (double) Math.setPI() / (sofarRadix * radix);
-    double cosW = (double) Math.cos(W);
-    double sinW = -(double) Math.sin(W);
+    float W = 2 * (float) Math.setPI() / (sofarRadix * radix);
+    float cosW = (float) Math.cos(W);
+    float sinW = -(float) Math.sin(W);
  
-    double twiddleRe[] = new double[radix];
-    double twiddleIm[] = new double[radix];
-    double twRe = 1.0f, twIm = 0f;
+    float twiddleRe[] = new float[radix];
+    float twiddleIm[] = new float[radix];
+    float twRe = 1.0f, twIm = 0f;
  
      //Initialize twiddle addBk.address variables.
      int dataOffset = 0, groupOffset = 0, address = 0;
  
      for (int dataNo = 0; dataNo < sofarRadix; dataNo++) {
-      //System.printString("datano="+dataNo);
        if (sofarRadix > 1) {
         twiddleRe[0] = 1.0f;
         twiddleIm[0] = 0.0f;
@@ -257,7 +267,6 @@ public class fft2d extends Thread {
         twRe = tem;
        }
        for (int groupNo = 0; groupNo < remainRadix; groupNo++) {
-       //System.printString("groupNo="+groupNo);
         if ((sofarRadix > 1) && (dataNo > 0)) {
           temRe[0] = outputRe[address];
           temIm[0] = outputIm[address];
@@ -272,14 +281,11 @@ public class fft2d extends Thread {
           } while (blockIndex < radix);
         } else {
           for (int i = 0; i < radix; i++) {
-           //System.printString("temRe.length="+temRe.length);
-           //System.printString("i = "+i);
             temRe[i] = outputRe[address];
             temIm[i] = outputIm[address];
             address += sofarRadix;
           }
         }
-       //System.printString("radix="+radix);
         if(radix == 2) {
           tem = temRe[0] + temRe[1];
           temRe[1] = temRe[0] - temRe[1];
@@ -288,17 +294,17 @@ public class fft2d extends Thread {
           temIm[1] = temIm[0] - temIm[1];
           temIm[0] = tem;
         } else if( radix == 3) {
-         double t1Re = temRe[1] + temRe[2];
-         double t1Im = temIm[1] + temIm[2];
+         float t1Re = temRe[1] + temRe[2];
+         float t1Im = temIm[1] + temIm[2];
           temRe[0] = temRe[0] + t1Re;
           temIm[0] = temIm[0] + t1Im;
  
-         double m1Re = myfft.cos2to3PI * t1Re;
-         double m1Im = myfft.cos2to3PI * t1Im;
-         double m2Re = myfft.sin2to3PI * (temIm[1] - temIm[2]);
-         double m2Im = myfft.sin2to3PI * (temRe[2] - temRe[1]);
-         double s1Re = temRe[0] + m1Re;
-         double s1Im = temIm[0] + m1Im;
+         float m1Re = myfft.cos2to3PI * t1Re;
+         float m1Im = myfft.cos2to3PI * t1Im;
+         float m2Re = myfft.sin2to3PI * (temIm[1] - temIm[2]);
+         float m2Im = myfft.sin2to3PI * (temRe[2] - temRe[1]);
+         float s1Re = temRe[0] + m1Re;
+         float s1Im = temIm[0] + m1Im;
  
           temRe[1] = s1Re + m2Re;
           temIm[1] = s1Im + m2Im;
@@ -330,9 +336,9 @@ public class fft2d extends Thread {
    } //twiddle operation
  
    // The two arguments dataRe[], dataIm[] are mainly for using in fft8();
-  private static void fft4(double dataRe[], double dataIm[]) {
-    double t1Re,t1Im, t2Re,t2Im;
-    double m2Re,m2Im, m3Re,m3Im;
+  private static void fft4(float dataRe[], float dataIm[]) {
+    float t1Re,t1Im, t2Re,t2Im;
+    float m2Re,m2Im, m3Re,m3Im;
  
      t1Re = dataRe[0] + dataRe[2];
      t1Im = dataIm[0] + dataIm[2];
@@ -355,10 +361,10 @@ public class fft2d extends Thread {
    }   // End of function fft4().
  
    // The two arguments dataRe[], dataIm[] are mainly for using in fft10();
-  private static void fft5(fft1d myfft, double dataRe[], double dataIm[]) {
-    double t1Re,t1Im, t2Re,t2Im, t3Re,t3Im, t4Re,t4Im, t5Re,t5Im;
-    double m1Re,m1Im, m2Re,m2Im, m3Re,m3Im, m4Re,m4Im, m5Re,m5Im;
-    double s1Re,s1Im, s2Re,s2Im, s3Re,s3Im, s4Re,s4Im, s5Re,s5Im;
+  private static void fft5(fft1d myfft, float dataRe[], float dataIm[]) {
+    float t1Re,t1Im, t2Re,t2Im, t3Re,t3Im, t4Re,t4Im, t5Re,t5Im;
+    float m1Re,m1Im, m2Re,m2Im, m3Re,m3Im, m4Re,m4Im, m5Re,m5Im;
+    float s1Re,s1Im, s2Re,s2Im, s3Re,s3Im, s4Re,s4Im, s5Re,s5Im;
  
      t1Re = dataRe[1] + dataRe[4];
      t1Im = dataIm[1] + dataIm[4];
@@ -406,12 +412,12 @@ public class fft2d extends Thread {
      dataIm[4] = s2Im - s3Im;
    }   // End of function fft5().
  
-  private static void fft8(fft1d myfft, double[] temRe, double[] temIm) {
-    double data1Re[] = new double[4];
-    double data1Im[] = new double[4];
-    double data2Re[] = new double[4];
-    double data2Im[] = new double[4];
-    double tem;
+  private static void fft8(fft1d myfft, float[] temRe, float[] temIm) {
+    float data1Re[] = new float[4];
+    float data1Im[] = new float[4];
+    float data2Re[] = new float[4];
+    float data2Im[] = new float[4];
+    float tem;
  
      // To improve the speed, use direct assaignment instead for loop here.
      data1Re[0] = temRe[0];
@@ -464,11 +470,11 @@ public class fft2d extends Thread {
      temIm[7] = data1Im[3] - data2Im[3];
    }   // End of function fft8().
  
-  private static void fft10(fft1d myfft, double[] temRe, double[] temIm) {
-    double data1Re[] = new double[5];
-    double data1Im[] = new double[5];
-    double data2Re[] = new double[5];
-    double data2Im[] = new double[5];
+  private static void fft10(fft1d myfft, float[] temRe, float[] temIm) {
+    float data1Re[] = new float[5];
+    float data1Im[] = new float[5];
+    float data2Re[] = new float[5];
+    float data2Im[] = new float[5];
  
      // To improve the speed, use direct assaignment instead for loop here.
      data1Re[0] = temRe[0];
@@ -519,13 +525,13 @@ public class fft2d extends Thread {
      temIm[9] = data1Im[4] - data2Im[4];
    }   // End of function fft10().
  
-  private static void fftPrime(int radix, double[] temRe, double[] temIm) {
+  private static void fftPrime(int radix, float[] temRe, float[] temIm) {
      // Initial WRe, WIm.
-    double W = 2 * (double) Math.setPI() / radix;
-    double cosW = (double) Math.cos(W);
-    double sinW = -(double) Math.sin(W);
-    double WRe[] = new double[radix];
-    double WIm[] = new double[radix];
+    float W = 2 * (float) Math.setPI() / radix;
+    float cosW = (float) Math.cos(W);
+    float sinW = -(float) Math.sin(W);
+    float WRe[] = new float[radix];
+    float WIm[] = new float[radix];
  
      WRe[0] = 1;
      WIm[0] = 0;
@@ -538,14 +544,14 @@ public class fft2d extends Thread {
      }
  
      // FFT of prime length data, using DFT, can be improved in the future.
-    double rere, reim, imre, imim;
+    float rere, reim, imre, imim;
      int j, k;
      int max = (radix + 1) / 2;
  
-    double tem1Re[] = new double[max];
-    double tem1Im[] = new double[max];
-    double tem2Re[] = new double[max];
-    double tem2Im[] = new double[max];
+    float tem1Re[] = new float[max];
+    float tem1Im[] = new float[max];
+    float tem2Re[] = new float[max];
+    float tem2Im[] = new float[max];
  
      for (j = 1; j < max; j++) {
        tem1Re[j] = temRe[j] + temRe[radix - j];