Add correct atomic blocks to benchmark
authorbdemsky <bdemsky>
Sun, 27 Apr 2008 07:12:31 +0000 (07:12 +0000)
committerbdemsky <bdemsky>
Sun, 27 Apr 2008 07:12:31 +0000 (07:12 +0000)
Robust/src/Benchmarks/Prefetch/LUFact/dsm/Barrier.java [new file with mode: 0644]
Robust/src/Benchmarks/Prefetch/LUFact/dsm/JGFLUFactBench.java
Robust/src/Benchmarks/Prefetch/LUFact/dsm/LinpackRunner.java
Robust/src/Benchmarks/Prefetch/LUFact/dsm/makefile

diff --git a/Robust/src/Benchmarks/Prefetch/LUFact/dsm/Barrier.java b/Robust/src/Benchmarks/Prefetch/LUFact/dsm/Barrier.java
new file mode 100644 (file)
index 0000000..482b695
--- /dev/null
@@ -0,0 +1,52 @@
+public class Barrier {
+  int numthreads;
+  int entercount;
+  boolean cleared;
+
+  public Barrier(int n) {
+    numthreads=n;
+    cleared = false;
+    entercount = 0;
+  }
+
+  public Barrier() {
+  }
+
+  public void reset() {
+    cleared = false;
+    entercount = 0;
+  }
+
+  public static void enterBarrier(Barrier b) {
+    int tmp;
+    boolean retry=true;
+
+    do {
+      atomic {
+        if (!b.cleared) {
+          b.entercount++;
+          tmp = b.entercount;
+          if (tmp==b.numthreads) {
+            if(b.numthreads > 1)
+              b.cleared=true;
+            b.entercount--;
+            return;
+          }
+          retry=false;
+        }
+      }
+    } while(retry);
+
+    while(true) {
+      atomic {
+        if (b.cleared) {
+          b.entercount--;
+          int count = b.entercount;
+          if (count==0)
+            b.cleared=false;
+          return;
+        }
+      }
+    }
+  }
+}
index 3dea7eb69ce1ec0f353e4b86a1c57f628d1d739b..b791cd819f851c0279476e8786faaf4eaae07e80 100644 (file)
@@ -67,10 +67,10 @@ public class JGFLUFactBench {
 
     /* spawn threads */
     LinpackRunner[] thobjects;
-    TournamentBarrier br;
+    Barrier br;
     atomic {
       thobjects = global new LinpackRunner[numthreads];
-      br = global new TournamentBarrier(numthreads);
+      br = global new Barrier(numthreads);
     }
 
     //JGFInstrumentor.startTimer("Section2:LUFact:Kernel", instr.timers);  
index 73b2a1ef57057d70de5eab3918b34bc4641571b6..e19fa177c2e2d001c59bdcc24279226c6f44c75a 100644 (file)
 class LinpackRunner extends Thread {
   int id,lda,n,info,ipvt[];
   double a[][];
-  TournamentBarrier br;
+  Barrier br;
   int nthreads;
 
-  public LinpackRunner(int id, double a[][], int lda, int n, int ipvt[],TournamentBarrier br, int nthreads) {
+  public LinpackRunner(int id, double a[][], int lda, int n, int ipvt[],Barrier br, int nthreads) {
     this.id = id;
     this.a=a;
     this.lda=lda;
@@ -40,76 +40,97 @@ class LinpackRunner extends Thread {
   }
 
   public void run() {
-    double[] col_k, col_j;
-    double t;
-    int j,k,kp1,l,nm1;
-    int info;
-    int slice,ilow,iupper;
-    // gaussian elimination with partial pivoting
-    atomic {
+      double[] col_k, col_j;
+      double t;
+      int j,k,kp1,l,nm1;
+      int info;
+      int slice,ilow,iupper;
+      // gaussian elimination with partial pivoting
       info = 0;
-      nm1 = n - 1;
+      int nlocal;
+      Barrier tmpbr;
+      int lid;
+      atomic {
+         nlocal=n;
+         tmpbr=br;
+         lid=id;
+      }
+      
+      nm1 = nlocal - 1;
       if (nm1 >=  0) {
-        for (k = 0; k < nm1; k++) {
-          col_k = a[k];
-          kp1 = k + 1;
-          // find l = pivot index
-          l = idamax(n-k,col_k,k,1) + k;
-          if(id==0) {
-            ipvt[k] = l;
-          }
-          // synchronise threads
-          br.DoBarrier(id);
-          // zero pivot implies this column already triangularized
-          if (col_k[l] != 0) {
-            br.DoBarrier(id);
-            // interchange if necessary
-            if(id == 0 ) {
-              if (l != k) {
-                t = col_k[l];
-                col_k[l] = col_k[k];
-                col_k[k] = t;
-              }
-            }
-            // synchronise threads
-            br.DoBarrier(id);
-            // compute multipliers
-            t = -1.0/col_k[k];
-            if(id == 0) {
-              dscal(n-(kp1),t,col_k,kp1,1);
-            }
-            // synchronise threads
-            br.DoBarrier(id);
-            // row elimination with column indexing
-            slice = ((n-kp1) + nthreads-1)/nthreads;
-            ilow = (id*slice)+kp1;
-            iupper = ((id+1)*slice)+kp1;
-            if (iupper > n ) iupper=n;
-            if (ilow > n ) ilow=n;
-            for (j = ilow; j < iupper; j++) {
-              col_j = a[j];
-              t = col_j[l];
-              if (l != k) {
-                col_j[l] = col_j[k];
-                col_j[k] = t;
-              }
-              daxpy(n-(kp1),t,col_k,kp1,1,
-                  col_j,kp1,1);
-            }
-            // synchronise threads
-            br.DoBarrier(id);
-          } else {
-            info = k;
-          }
-          br.DoBarrier(id);
-        }
+         for (k = 0; k < nm1; k++) {
+             atomic {
+                 col_k = a[k];
+                 kp1 = k + 1;
+                 // find l = pivot index
+                 l = idamax(nlocal-k,col_k,k,1) + k;
+                 if(lid==0) {
+                     ipvt[k] = l;
+                 }
+             }
+             // synchronise threads
+             Barrier.enterBarrier(tmpbr);
+             // zero pivot implies this column already triangularized
+             boolean b;
+             atomic {
+                 b=col_k[l]!=0;
+             }
+             if (b) {
+                 Barrier.enterBarrier(tmpbr);
+                 // interchange if necessary
+                 atomic {
+                     if(lid == 0 ) {
+                         if (l != k) {
+                             t = col_k[l];
+                             col_k[l] = col_k[k];
+                             col_k[k] = t;
+                         }
+                     }
+                 }
+                 // synchronise threads
+                 Barrier.enterBarrier(tmpbr);
+                 // compute multipliers
+                 atomic {
+                     t = -1.0/col_k[k];
+                     if(lid == 0) {
+                         dscal(nlocal-(kp1),t,col_k,kp1,1);
+                     }
+                 }
+                 // synchronise threads
+                 Barrier.enterBarrier(tmpbr);
+                 // row elimination with column indexing
+                 atomic {
+                     slice = ((nlocal-kp1) + nthreads-1)/nthreads;
+                     ilow = (lid*slice)+kp1;
+                     iupper = ((lid+1)*slice)+kp1;
+                     if (iupper > nlocal ) iupper=nlocal;
+                     if (ilow > nlocal ) ilow=nlocal;
+                     for (j = ilow; j < iupper; j++) {
+                         col_j = a[j];
+                         t = col_j[l];
+                         if (l != k) {
+                             col_j[l] = col_j[k];
+                             col_j[k] = t;
+                         }
+                         daxpy(nlocal-(kp1),t,col_k,kp1,1,
+                               col_j,kp1,1);
+                     }
+                 }
+                 // synchronise threads
+                 Barrier.enterBarrier(tmpbr);
+             } else {
+                 info = k;
+             }
+             Barrier.enterBarrier(tmpbr);
+         }
       }
-
-      if(id==0) {
-        ipvt[n-1] = n-1;
+      
+      atomic {
+         if(lid==0) {
+             ipvt[nlocal-1] = nlocal-1;
+         }
+         if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1;
       }
-      if (a[(n-1)][(n-1)] == 0) info = n-1;
-    }
   }
 
   /*
index 5f73b9f6857b41dcaa4c2c21fc9d87292e6b7908..651ffc88970a445b9db2ced0788790f25e969314 100644 (file)
@@ -3,7 +3,7 @@ SRC=${MAINCLASS}.java \
 JGFLUFactBench.java \
 JGFInstrumentor.java \
 JGFTimer.java \
-TournamentBarrier.java \
+Barrier.java \
 Linpack.java \
 LinpackRunner.java
 FLAGS=-dsm -prefetch -optimize -debug -profile -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -o ${MAINCLASS} -trueprob 0.8