Robust/src/Benchmarks/Prefetch/LUFact/java/JGFLUFactBench.java

   1 /**************************************************************************
   2  *                                                                         *
   3  *         Java Grande Forum Benchmark Suite - Thread Version 1.0          *
   4  *                                                                         *
   5  *                            produced by                                  *
   6  *                                                                         *
   7  *                  Java Grande Benchmarking Project                       *
   8  *                                                                         *
   9  *                                at                                       *
  10  *                                                                         *
  11  *                Edinburgh Parallel Computing Centre                      *
  12  *                                                                         *
  13  *                email: epcc-javagrande@epcc.ed.ac.uk                     *
  14  *                                                                         *
  15  *                                                                         *
  16  *      This version copyright (c) The University of Edinburgh, 2001.      *
  17  *                         All rights reserved.                            *
  18  *                                                                         *
  19  **************************************************************************/
  20 public class JGFLUFactBench {
  21   public int nthreads;
  22   private int size;
  23   private int[] datasizes;
  24   public int cachelinesize;
  25   public JGFInstrumentor instr;
  26   double a[][];
  27   double b[];
  28   double x[];
  29   double ops,total,norma,normx;
  30   double resid,time;
  31   double kf;
  32   int n,i,ntimes,info,lda,ldaa,kflops;
  33   int ipvt[];
  34
  35   public JGFLUFactBench(int nthreads, JGFInstrumentor instr) {
  36     this.nthreads=nthreads;
  37     this.instr = instr;
  38     datasizes = new int[3];
  39     datasizes[0] = 500;
  40     datasizes[1] = 1000;
  41     datasizes[2] = 2000;
  42     cachelinesize = 128;
  43   }
  44
  45   public void JGFsetsize(int size) {
  46     this.size = size;
  47   }
  48
  49   public void JGFinitialise() {
  50     n = datasizes[size];
  51     ldaa = n;
  52     lda = ldaa + 1;
  53
  54     a = new double[ldaa][lda];
  55     b = new double [ldaa];
  56     x = new double [ldaa];
  57     ipvt = new int [ldaa];
  58
  59     long nl = (long) n;   //avoid integer overflow
  60     ops = (2.0*(nl*nl*nl))/3.0 + 2.0*(nl*nl);
  61     norma = matgen(a,lda,n,b);
  62   }
  63
  64   public static void JGFkernel(JGFLUFactBench lub, JGFInstrumentor instr) {
  65     int numthreads;
  66     numthreads = lub.nthreads;
  67
  68     /* spawn threads */
  69     LinpackRunner[] thobjects;
  70     TournamentBarrier br;
  71     thobjects = new LinpackRunner[numthreads];
  72     br = new TournamentBarrier(numthreads);
  73
  74     instr.startTimer("Section2:LUFact:Kernel", instr.timers);
  75
  76     LinpackRunner tmp;
  77     int mid = (128<<24)|(195<<16)|(175<<8)|73;
  78     for(int i=1;i<numthreads;i++) {
  79       thobjects[i] = new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
  80       tmp = thobjects[i];
  81       tmp.start();
  82     }
  83
  84     thobjects[0] = new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
  85     tmp = thobjects[0];
  86     tmp.start();
  87     try {
  88       tmp.join();
  89     } catch (InterruptedException e) {}
  90
  91     for(int i=1;i<numthreads;i++) {
  92       tmp = thobjects[i];
  93       try {
  94         tmp.join();
  95       } catch (InterruptedException e) {}
  96     }
  97
  98     lub.dgesl(lub.a,lub.lda,lub.n,lub.ipvt,lub.b,0);
  99     instr.stopTimer("Section2:LUFact:Kernel", instr.timers);
 100   }
 101
 102   public void JGFvalidate() {
 103     int i;
 104     double eps,residn;
 105     double[] ref;
 106
 107     ref = new double[3];
 108     ref[0] = 6.0;
 109     ref[1] = 12.0;
 110     ref[2] = 20.0;
 111
 112     for (i = 0; i < n; i++) {
 113       x[i] = b[i];
 114     }
 115     norma = matgen(a,lda,n,b);
 116     for (i = 0; i < n; i++) {
 117       b[i] = -(b[i]);
 118     }
 119
 120     dmxpy(n,b,n,lda,x,a);
 121     resid = 0.0;
 122     normx = 0.0;
 123     for (i = 0; i < n; i++) {
 124       //resid = (resid > abs(b[i])) ? resid : abs(b[i]);
 125       //normx = (normx > abs(x[i])) ? normx : abs(x[i]);
 126       if (resid <= abs(b[i])) resid = abs(b[i]);
 127       if (normx <= abs(x[i])) normx = abs(x[i]);
 128     }
 129     eps =  epslon((double)1.0);
 130     residn = resid/( n*norma*normx*eps );
 131
 132     if (residn > ref[size]) {
 133       System.out.println("Validation failed");
 134       System.out.println("Computed Norm Res = " + (long) residn * 1000000);
 135       System.out.println("Reference Norm Res = " + (long) ref[size] * 1000000);
 136     }
 137   }
 138
 139   double abs (double d) {
 140     if (d >= 0) return d;
 141     else return -d;
 142     //return (d >= 0) ? d : -d;
 143   }
 144
 145   double matgen (double a[][], int lda, int n, double b[])
 146   {
 147     double norma;
 148     int init, i, j;
 149
 150     init = 1325;
 151     norma = 0.0;
 152     /*  Next two for() statements switched.  Solver wants
 153         matrix in column order. --dmd 3/3/97
 154         */
 155     for (i = 0; i < n; i++) {
 156       for (j = 0; j < n; j++) {
 157         init = 3125*init % 65536;
 158         a[j][i] = (init - 32768.0)/16384.0;
 159         //norma = (a[j][i] > norma) ? a[j][i] : norma;
 160         if (a[j][i] > norma) {
 161           norma = a[j][i];
 162         }
 163       }
 164     }
 165     for (i = 0; i < n; i++) {
 166       b[i] = 0.0;
 167     }
 168     for (j = 0; j < n; j++) {
 169       for (i = 0; i < n; i++) {
 170         b[i] += a[j][i];
 171       }
 172     }
 173     return norma;
 174   }
 175
 176   void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
 177   {
 178     double t;
 179     int k,kb,l,nm1,kp1;
 180
 181     nm1 = n - 1;
 182     if (job == 0) {
 183       // job = 0 , solve  a * x = b.  first solve  l*y = b
 184       if (nm1 >= 1) {
 185         for (k = 0; k < nm1; k++) {
 186           l = ipvt[k];
 187           t = b[l];
 188           if (l != k){
 189             b[l] = b[k];
 190             b[k] = t;
 191           }
 192           kp1 = k + 1;
 193           daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
 194         }
 195       }
 196       // now solve  u*x = y
 197       for (kb = 0; kb < n; kb++) {
 198         k = n - (kb + 1);
 199         b[k] /= a[k][k];
 200         t = -b[k];
 201         daxpy(k,t,a[k],0,1,b,0,1);
 202       }
 203     } else {
 204       // job = nonzero, solve  trans(a) * x = b.  first solve  trans(u)*y = b
 205       for (k = 0; k < n; k++) {
 206         t = ddot(k,a[k],0,1,b,0,1);
 207         b[k] = (b[k] - t)/a[k][k];
 208       }
 209       // now solve trans(l)*x = y
 210       if (nm1 >= 1) {
 211         for (kb = 1; kb < nm1; kb++) {
 212           k = n - (kb+1);
 213           kp1 = k + 1;
 214           b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
 215           l = ipvt[k];
 216           if (l != k) {
 217             t = b[l];
 218             b[l] = b[k];
 219             b[k] = t;
 220           }
 221         }
 222       }
 223     }
 224   }
 225
 226   /*
 227      constant times a vector plus a vector.
 228      jack dongarra, linpack, 3/11/78.
 229      */
 230   void daxpy( int n, double da, double dx[], int dx_off, int incx,
 231       double dy[], int dy_off, int incy)
 232   {
 233     int i,ix,iy;
 234
 235     if ((n > 0) && (da != 0)) {
 236       if (incx != 1 || incy != 1) {
 237         // code for unequal increments or equal increments not equal to 1
 238         ix = 0;
 239         iy = 0;
 240         if (incx < 0) ix = (-n+1)*incx;
 241         if (incy < 0) iy = (-n+1)*incy;
 242         for (i = 0;i < n; i++) {
 243           dy[iy +dy_off] += da*dx[ix +dx_off];
 244           ix += incx;
 245           iy += incy;
 246         }
 247         return;
 248       } else {
 249         // code for both increments equal to 1
 250         for (i=0; i < n; i++)
 251           dy[i +dy_off] += da*dx[i +dx_off];
 252       }
 253     }
 254   }
 255
 256   /*
 257      forms the dot product of two vectors.
 258      jack dongarra, linpack, 3/11/78.
 259      */
 260   double ddot( int n, double dx[], int dx_off, int incx, double dy[],
 261       int dy_off, int incy)
 262   {
 263     double dtemp;
 264     int i,ix,iy;
 265     dtemp = 0;
 266     if (n > 0) {
 267       if (incx != 1 || incy != 1) {
 268         // code for unequal increments or equal increments not equal to 1
 269         ix = 0;
 270         iy = 0;
 271         if (incx < 0) ix = (-n+1)*incx;
 272         if (incy < 0) iy = (-n+1)*incy;
 273         for (i = 0;i < n; i++) {
 274           dtemp += dx[ix +dx_off]*dy[iy +dy_off];
 275           ix += incx;
 276           iy += incy;
 277         }
 278       } else {
 279         // code for both increments equal to 1
 280         for (i=0;i < n; i++)
 281           dtemp += dx[i +dx_off]*dy[i +dy_off];
 282       }
 283     }
 284     return(dtemp);
 285   }
 286
 287   /*
 288      scales a vector by a constant.
 289      jack dongarra, linpack, 3/11/78.
 290      */
 291   void dscal( int n, double da, double dx[], int dx_off, int incx)
 292   {
 293     int i,nincx;
 294     if (n > 0) {
 295       if (incx != 1) {
 296         // code for increment not equal to 1
 297         nincx = n*incx;
 298         for (i = 0; i < nincx; i += incx)
 299           dx[i +dx_off] *= da;
 300       } else {
 301         // code for increment equal to 1
 302         for (i = 0; i < n; i++)
 303           dx[i +dx_off] *= da;
 304       }
 305     }
 306   }
 307
 308   /*
 309      finds the index of element having max. absolute value.
 310      jack dongarra, linpack, 3/11/78.
 311      */
 312   int idamax( int n, double dx[], int dx_off, int incx)
 313   {
 314     double dmax, dtemp;
 315     int i, ix, itemp=0;
 316
 317     if (n < 1) {
 318       itemp = -1;
 319     } else if (n ==1) {
 320       itemp = 0;
 321     } else if (incx != 1) {
 322       // code for increment not equal to 1
 323       dmax = abs(dx[0 +dx_off]);
 324       ix = 1 + incx;
 325       for (i = 1; i < n; i++) {
 326         dtemp = abs(dx[ix + dx_off]);
 327         if (dtemp > dmax)  {
 328           itemp = i;
 329           dmax = dtemp;
 330         }
 331         ix += incx;
 332       }
 333     } else {
 334       // code for increment equal to 1
 335       itemp = 0;
 336       dmax = abs(dx[0 +dx_off]);
 337       for (i = 1; i < n; i++) {
 338         dtemp = abs(dx[i + dx_off]);
 339         if (dtemp > dmax) {
 340           itemp = i;
 341           dmax = dtemp;
 342         }
 343       }
 344     }
 345     return (itemp);
 346   }
 347
 348   /*
 349      estimate unit roundoff in quantities of size x.
 350      this program should function properly on all systems
 351      satisfying the following two assumptions,
 352      1.  the base used in representing dfloating point
 353      numbers is not a power of three.
 354      2.  the quantity  a  in statement 10 is represented to
 355      the accuracy used in dfloating point variables
 356      that are stored in memory.
 357      the statement number 10 and the go to 10 are intended to
 358      force optimizing compilers to generate code satisfying
 359      assumption 2.
 360      under these assumptions, it should be true that,
 361      a  is not exactly equal to four-thirds,
 362      b  has a zero for its last bit or digit,
 363      c  is not exactly equal to one,
 364      eps  measures the separation of 1.0 from
 365      the next larger dfloating point number.
 366      the developers of eispack would appreciate being informed
 367      about any systems where these assumptions do not hold.
 368
 369    *****************************************************************
 370    this routine is one of the auxiliary routines used by eispack iii
 371    to avoid machine dependencies.
 372    *****************************************************************
 373
 374    this version dated 4/6/83.
 375    */
 376   double epslon (double x)
 377   {
 378     double a,b,c,eps;
 379
 380     a = 4.0e0/3.0e0;
 381     eps = 0;
 382     while (eps == 0) {
 383       b = a - 1.0;
 384       c = b + b + b;
 385       eps = abs(c-1.0);
 386     }
 387     return(eps*abs(x));
 388   }
 389
 390   void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
 391   {
 392     int j,i;
 393     // cleanup odd vector
 394     for (j = 0; j < n2; j++) {
 395       for (i = 0; i < n1; i++) {
 396         y[i] += x[j]*m[j][i];
 397       }
 398     }
 399   }
 400   /*
 401      public static void JGFvalidate(JGFLUFactBench lub) {
 402      int i;
 403      double eps,residn;
 404      double[] ref;
 405
 406      ref = new double[3];
 407      ref[0] = 6.0;
 408      ref[1] = 12.0;
 409      ref[2] = 20.0;
 410
 411      atomic {
 412      for (i = 0; i < lub.n; i++) {
 413      lub.x[i] = lub.b[i];
 414      }
 415      lub.norma = lub.matgen(lub.a,lub.lda,lub.n,lub.b);
 416      for (i = 0; i < lub.n; i++) {
 417      lub.b[i] = -(lub.b[i]);
 418      }
 419
 420      lub.dmxpy(lub.n,lub.b,lub.n,lub.lda,lub.x,lub.a);
 421      lub.resid = 0.0;
 422      lub.normx = 0.0;
 423      for (i = 0; i < lub.n; i++) {
 424 //resid = (resid > abs(b[i])) ? resid : abs(b[i]);
 425 //normx = (normx > abs(x[i])) ? normx : abs(x[i]);
 426 if (lub.resid <= abs(lub.b[i])) lub.resid = lub.abs(lub.b[i]);
 427 if (lub.normx <= abs(lub.x[i])) lub.normx = lub.abs(lub.x[i]);
 428 }
 429 eps =  lub.epslon((double)1.0);
 430 residn = lub.resid/( lub.n*lub.norma*lub.normx*eps );
 431 }
 432
 433 if (residn > ref[size]) {
 434 System.out.println("Validation failed");
 435 System.out.println("Computed Norm Res = " + (long) residn);
 436 System.out.println("Reference Norm Res = " + (long) ref[size]);
 437 }
 438 }
 439 */
 440
 441 }