latest changes to 2DFFT and LUFact benchmark
authoradash <adash>
Wed, 12 Nov 2008 04:09:34 +0000 (04:09 +0000)
committeradash <adash>
Wed, 12 Nov 2008 04:09:34 +0000 (04:09 +0000)
Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java
Robust/src/Benchmarks/Prefetch/LUFact/dsm/JGFLUFactBench.java
Robust/src/Benchmarks/Prefetch/LUFact/dsm/JGFLUFactBenchSizeA.java
Robust/src/Benchmarks/Prefetch/LUFact/dsm/LinpackRunner.java
Robust/src/Benchmarks/Prefetch/LUFact/dsm/makefile

index 977241388bfe0bbaf0385dfd5eb5ff95fbb8e0d6..3c7a6563a477a1836fe30f45a8b5d0c53e5af259 100644 (file)
@@ -53,14 +53,7 @@ public class fft2d extends Thread {
     // Tranpose data.
     if (start == 0) {
       atomic {
-       for(int i = 0; i<rowlength; i++) {
-         double tRe[] = tempdataRe[i];
-         double tIm[] = tempdataIm[i];
-         for(int j = 0; j<columnlength; j++) {
-           data2.dataRe[j][i] = tRe[j];
-           data2.dataIm[j][i] = tIm[j];
-         }
-       }
+       transpose(tempdataRe,tempdataIm, data2.dataRe,data2.dataIm, rowlength, columnlength);
       }
     }
 
@@ -82,6 +75,18 @@ public class fft2d extends Thread {
     }
   } //end of run
 
+  public void transpose(double[][] tempdataRe, double[][] tempdataIm, double[][] outputRe,
+                        double[][] outputIm, int rowlength, int columnlength) {
+    for(int i = 0; i<rowlength; i++) {
+      double tRe[] = tempdataRe[i];
+      double tIm[] = tempdataIm[i];
+      for(int j = 0; j<columnlength; j++) {
+       outputRe[j][i] = tRe[j];
+       outputIm[j][i] = tIm[j];
+      }
+    }
+  }
+
   public static void main(String[] args) {
     int NUM_THREADS = 1;
     int SIZE = 800;
@@ -213,7 +218,7 @@ public class fft2d extends Thread {
   }   // End of function permute().
 
   private static void twiddle(int factorIndex, fft1d myfft, double[] temRe, double[] temIm,
-                       double[] outputRe, double[] outputIm) {
+                              double[] outputRe, double[] outputIm) {
     // Get factor data.
     int sofarRadix = myfft.sofar[factorIndex];
     int radix = myfft.factors[factorIndex];
index 328c7a5dc934e847d8cf9754d6f8d27454d5ead9..d41f4d0d484137780a0ab2ec60b011c68da5b833 100644 (file)
@@ -50,6 +50,7 @@ public class JGFLUFactBench {
     lda = ldaa + 1;
 
     a = global new double[ldaa][lda];
+    //System.printString("row_ldaa = "+ldaa + "column_lda= "+lda+ "\n");
     b = global new double [ldaa];
     x = global new double [ldaa];
     ipvt = global new int [ldaa];
@@ -65,37 +66,50 @@ public class JGFLUFactBench {
       numthreads = lub.nthreads;
     }
 
+    int[] mid = new int[4];
+    mid[0] = (128<<24)|(195<<16)|(175<<8)|84; //dw-10
+    mid[1] = (128<<24)|(195<<16)|(175<<8)|85; //dw-11
+    mid[2] = (128<<24)|(195<<16)|(175<<8)|86; //dw-12
+    mid[3] = (128<<24)|(195<<16)|(175<<8)|87; //dw-13
+
     /* spawn threads */
     LinpackRunner[] thobjects;
-    Barrier br;
+    BarrierServer mybarr;
     atomic {
       thobjects = global new LinpackRunner[numthreads];
-      br = global new Barrier(numthreads);
+      mybarr = global new BarrierServer(numthreads);
     }
 
+    mybarr.start(mid[0]);
     //JGFInstrumentor.startTimer("Section2:LUFact:Kernel", instr.timers);  
     LinpackRunner tmp;
-    int[] mid = new int[4];
-    mid[0] = (128<<24)|(195<<16)|(175<<8)|73;
-    mid[1] = (128<<24)|(195<<16)|(175<<8)|69;
-    mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
-    mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
-    for(int i=1;i<numthreads;i++) {
+    
+    boolean waitfordone=true;
+    while(waitfordone) {
+      atomic {
+        //System.printString("HERE #1\n");
+        if (mybarr.done)
+          waitfordone=false;
+      }
+    }
+
+    for(int i=0;i<numthreads;i++) {
       atomic {
-        thobjects[i] = global new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
+        thobjects[i] = global new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,lub.nthreads);
         tmp = thobjects[i];
       }
       tmp.start(mid[i]);
     }
 
+    /*
     atomic {
-      thobjects[0] = global new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
+      thobjects[0] = global new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,lub.nthreads);
       tmp = thobjects[0];
     }
     tmp.start(mid[0]);
     tmp.join();
-
-    for(int i=1;i<numthreads;i++) {
+*/
+    for(int i=0;i<numthreads;i++) {
       atomic {
         tmp = thobjects[i];
       }
@@ -103,6 +117,7 @@ public class JGFLUFactBench {
     }
 
     atomic {
+      //System.printString("HERE #2\n");
       lub.dgesl(lub.a,lub.lda,lub.n,lub.ipvt,lub.b,0);
     }
 
index be55624328040086e19f672abc044a10523da011..153304c2aa7c671a9b5780d6b367b94d9d4cbef2 100644 (file)
@@ -57,6 +57,7 @@ public class JGFLUFactBenchSizeA {
     }
     JGFInstrumentor.addOpsToTimer("Section2:LUFact:Kernel", ((long)ops)/1.0e06, instr.timers);
     JGFInstrumentor.printTimer("Section2:LUFact:Kernel", instr.timers); 
+    System.printString("Finished\n");
   }
 }
 
index 6e499d1c4a072bf5c100fcded94b75f85e3459ae..730fa162ec7c2e1bc60fa3bd0d7e18cbf8706114 100644 (file)
@@ -1,36 +1,34 @@
 /**************************************************************************
-*                                                                         *
-*         Java Grande Forum Benchmark Suite - Thread Version 1.0          *
-*                                                                         *
-*                            produced by                                  *
-*                                                                         *
-*                  Java Grande Benchmarking Project                       *
-*                                                                         *
-*                                at                                       *
-*                                                                         *
-*                Edinburgh Parallel Computing Centre                      *
-*                                                                         *
-*                email: epcc-javagrande@epcc.ed.ac.uk                     *
-*                                                                         *
-*                                                                         *
-*      This version copyright (c) The University of Edinburgh, 2001.      *
-*                         All rights reserved.                            *
-*                                                                         *
-**************************************************************************/
+ *                                                                         *
+ *         Java Grande Forum Benchmark Suite - Thread Version 1.0          *
+ *                                                                         *
+ *                            produced by                                  *
+ *                                                                         *
+ *                  Java Grande Benchmarking Project                       *
+ *                                                                         *
+ *                                at                                       *
+ *                                                                         *
+ *                Edinburgh Parallel Computing Centre                      *
+ *                                                                         *
+ *                email: epcc-javagrande@epcc.ed.ac.uk                     *
+ *                                                                         *
+ *                                                                         *
+ *      This version copyright (c) The University of Edinburgh, 2001.      *
+ *                         All rights reserved.                            *
+ *                                                                         *
+ **************************************************************************/
 
 class LinpackRunner extends Thread {
   int id,lda,n,info,ipvt[];
   double a[][];
-  Barrier br;
   int nthreads;
 
-  public LinpackRunner(int id, double a[][], int lda, int n, int ipvt[],Barrier br, int nthreads) {
+  public LinpackRunner(int id, double a[][], int lda, int n, int ipvt[], int nthreads) {
     this.id = id;
     this.a=a;
     this.lda=lda;
     this.n=n;
     this.ipvt=ipvt;
-    this.br=br;
     this.nthreads = nthreads;
   }
 
@@ -40,104 +38,114 @@ class LinpackRunner extends Thread {
   }
 
   public void run() {
-      double[] col_k, col_j;
-      double t;
-      int j,k,kp1,l,nm1;
-      int info;
-      int slice,ilow,iupper;
-      // gaussian elimination with partial pivoting
-      info = 0;
-      int nlocal;
-      Barrier tmpbr;
-      int lid;
-      atomic {
-         nlocal=n;
-         tmpbr=br;
-         lid=id;
+    Barrier barr;
+    barr = new Barrier("128.195.175.84");
+    double[] col_k, col_j;
+    double t;
+    int j,k,kp1,l,nm1;
+    int info;
+    int slice,ilow,iupper;
+    // gaussian elimination with partial pivoting
+    info = 0;
+    int nlocal;
+    int lid;
+    atomic {
+     //System.printString("Atomic #1\t");
+      nlocal=n;
+      lid=id;
+    }
+
+
+    nm1 = nlocal - 1;
+    if (nm1 >=  0) {
+      //System.printString("nm1 = " +nm1+ "\n");
+      for (k = 0; k < nm1; k++) {
+        atomic {
+          //System.printString("Atomic #2\t");
+          col_k = a[k];
+          kp1 = k + 1;
+          // find l = pivot index
+          l = idamax(nlocal-k,col_k,k,1) + k;
+          if(lid==0) {
+            ipvt[k] = l;
+          }
+        }
+        // synchronise threads
+        Barrier.enterBarrier(barr);
+
+        // zero pivot implies this column already triangularized
+        boolean b;
+        atomic {
+          //System.printString("Atomic #3\t");
+          b=col_k[l]!=0;
+        }
+        if (b) {
+          Barrier.enterBarrier(barr);
+          // interchange if necessary
+          if(lid == 0 ) {
+            if (l != k) {
+              atomic {
+                t = col_k[l];
+                col_k[l] = col_k[k];
+                col_k[k] = t;
+              }
+            }
+          }
+          // synchronise threads
+          Barrier.enterBarrier(barr);
+          // compute multipliers
+          // t = -1.0/col_k[k];
+          if(lid == 0) {
+            atomic {
+              t = -1.0/col_k[k];
+              dscal(nlocal-(kp1),t,col_k,kp1,1);
+            }
+          }
+
+          // synchronise threads
+          Barrier.enterBarrier(barr);
+
+          // row elimination with column indexing
+          atomic {
+            //System.printString("Atomic #4\t");
+            slice = ((nlocal-kp1) + nthreads-1)/nthreads;
+            ilow = (lid*slice)+kp1;
+            iupper = ((lid+1)*slice)+kp1;
+            if (iupper > nlocal ) iupper=nlocal;
+            if (ilow > nlocal ) ilow=nlocal;
+            //System.printString("ilow= " + ilow + " iupper= " + iupper + "\n");
+            for (j = ilow; j < iupper; j++) {
+              col_j = a[j];
+              t = col_j[l];
+              if (l != k) {
+                col_j[l] = col_j[k];
+                col_j[k] = t;
+              }
+              daxpy(nlocal-(kp1),t,col_k,kp1,1,
+                  col_j,kp1,1);
+            }
+          }
+
+          // synchronise threads
+          Barrier.enterBarrier(barr);
+        } else {
+          info = k;
+        }
+        Barrier.enterBarrier(barr);
       }
-      
-      nm1 = nlocal - 1;
-      if (nm1 >=  0) {
-         for (k = 0; k < nm1; k++) {
-             atomic {
-                 col_k = a[k];
-                 kp1 = k + 1;
-                 // find l = pivot index
-                 l = idamax(nlocal-k,col_k,k,1) + k;
-                 if(lid==0) {
-                     ipvt[k] = l;
-                 }
-             }
-             // synchronise threads
-             Barrier.enterBarrier(tmpbr);
-             System.clearPrefetchCache();
-             
-             // zero pivot implies this column already triangularized
-             boolean b;
-             atomic {
-                 b=col_k[l]!=0;
-             }
-             if (b) {
-                 Barrier.enterBarrier(tmpbr);
-                 System.clearPrefetchCache();
-                 // interchange if necessary
-                 atomic {
-                     if(lid == 0 ) {
-                         if (l != k) {
-                             t = col_k[l];
-                             col_k[l] = col_k[k];
-                             col_k[k] = t;
-                         }
-                     }
-                 }
-                 // synchronise threads
-                 Barrier.enterBarrier(tmpbr);
-                 System.clearPrefetchCache();
-                 // compute multipliers
-                 atomic {
-                     t = -1.0/col_k[k];
-                     if(lid == 0) {
-                         dscal(nlocal-(kp1),t,col_k,kp1,1);
-                     }
-                 }
-                 // synchronise threads
-                 Barrier.enterBarrier(tmpbr);
-                 System.clearPrefetchCache();
-                 // row elimination with column indexing
-                 atomic {
-                     slice = ((nlocal-kp1) + nthreads-1)/nthreads;
-                     ilow = (lid*slice)+kp1;
-                     iupper = ((lid+1)*slice)+kp1;
-                     if (iupper > nlocal ) iupper=nlocal;
-                     if (ilow > nlocal ) ilow=nlocal;
-                     for (j = ilow; j < iupper; j++) {
-                         col_j = a[j];
-                         t = col_j[l];
-                         if (l != k) {
-                             col_j[l] = col_j[k];
-                             col_j[k] = t;
-                         }
-                         daxpy(nlocal-(kp1),t,col_k,kp1,1,
-                               col_j,kp1,1);
-                     }
-                 }
-                 // synchronise threads
-                 Barrier.enterBarrier(tmpbr);
-                 System.clearPrefetchCache();
-             } else {
-                 info = k;
-             }
-             Barrier.enterBarrier(tmpbr);
-             System.clearPrefetchCache();
-         }
+    }
+
+    //atomic {
+      //System.printString("Atomic #5\t");
+      if(lid==0) {
+        atomic {
+        ipvt[nlocal-1] = nlocal-1;
+        }
       }
-      
       atomic {
-         if(lid==0) {
-             ipvt[nlocal-1] = nlocal-1;
-         }
-         if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1;
+      if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1;
       }
+    //}
   }
 
   /*
@@ -230,4 +238,3 @@ class LinpackRunner extends Thread {
     }
   }
 }
-
index 3ad1cc01040d5f4718b649ed3f00dce03a782a48..100f37e2f661df8b17f3e7da0fe31223bc0c5549 100644 (file)
@@ -3,21 +3,20 @@ SRC=${MAINCLASS}.java \
 JGFLUFactBench.java \
 JGFInstrumentor.java \
 JGFTimer.java \
-Barrier.java \
 LinpackRunner.java
-FLAGS=-dsm -prefetch -nooptimize -debug -profile -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -trueprob 0.95
-FLAGS2=-dsm -nooptimize -debug -profile -mainclass ${MAINCLASS}
+FLAGS=-dsm -prefetch -dsmcaching -optimize -excprefetch JGFLUFactBench.JGFLUFactBench -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -trueprob 0.90
+FLAGS2=-dsm -optimize -mainclass ${MAINCLASS}
 
 default:
 #../../../../buildscript ${FLAGS2} ${SRC}
 #      ../../../../buildscript ${FLAGS} ${SRC}
-       ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}1NP ${SRC}
+       ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}1NPNC ${SRC}
        ../../../../buildscript ${FLAGS} -o ${MAINCLASS}1  ${SRC}
-       cp ${MAINCLASS}1NP.bin ${MAINCLASS}2NP.bin
+       cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}2NPNC.bin
        cp ${MAINCLASS}1.bin ${MAINCLASS}2.bin
-       cp ${MAINCLASS}1NP.bin ${MAINCLASS}3NP.bin
+       cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}3NPNC.bin
        cp ${MAINCLASS}1.bin ${MAINCLASS}3.bin
-       cp ${MAINCLASS}1NP.bin ${MAINCLASS}4NP.bin
+       cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}4NPNC.bin
        cp ${MAINCLASS}1.bin ${MAINCLASS}4.bin
 
 clean: