1 /**************************************************************************
3 * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
7 * Java Grande Benchmarking Project *
11 * Edinburgh Parallel Computing Centre *
13 * email: epcc-javagrande@epcc.ed.ac.uk *
16 * This version copyright (c) The University of Edinburgh, 2001. *
17 * All rights reserved. *
19 **************************************************************************/
20 public class JGFLUFactBench {
23 private int[] datasizes;
24 public int cachelinesize;
28 double ops,total,norma,normx;
31 int n,i,ntimes,info,lda,ldaa,kflops;
34 public JGFLUFactBench(int nthreads) {
35 this.nthreads=nthreads;
36 datasizes = global new int[3];
43 public void JGFsetsize(int size) {
47 public void JGFinitialise() {
52 a = global new double[ldaa][lda];
53 b = global new double [ldaa];
54 x = global new double [ldaa];
55 ipvt = global new int [ldaa];
57 long nl = (long) n; //avoid integer overflow
58 ops = (2.0*(nl*nl*nl))/3.0 + 2.0*(nl*nl);
59 norma = matgen(a,lda,n,b);
62 public static void JGFkernel(JGFLUFactBench lub) {
65 numthreads = lub.nthreads;
69 LinpackRunner[] thobjects;
72 thobjects = global new LinpackRunner[numthreads];
73 br = global new Barrier(numthreads);
76 //JGFInstrumentor.startTimer("Section2:LUFact:Kernel", instr.timers);
78 int[] mid = new int[4];
79 mid[0] = (128<<24)|(195<<16)|(175<<8)|73;
80 mid[1] = (128<<24)|(195<<16)|(175<<8)|69;
81 mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
82 mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
83 for(int i=1;i<numthreads;i++) {
85 thobjects[i] = global new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
92 thobjects[0] = global new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
98 for(int i=1;i<numthreads;i++) {
106 lub.dgesl(lub.a,lub.lda,lub.n,lub.ipvt,lub.b,0);
109 //JGFInstrumentor.stopTimer("Section2:LUFact:Kernel", instr.timers);
112 public int JGFvalidate() {
122 for (i = 0; i < n; i++) {
125 norma = matgen(a,lda,n,b);
126 for (i = 0; i < n; i++) {
130 dmxpy(n,b,n,lda,x,a);
133 for (i = 0; i < n; i++) {
134 //resid = (resid > abs(b[i])) ? resid : abs(b[i]);
135 //normx = (normx > abs(x[i])) ? normx : abs(x[i]);
136 if (resid <= abs(b[i])) resid = abs(b[i]);
137 if (normx <= abs(x[i])) normx = abs(x[i]);
139 eps = epslon((double)1.0);
140 residn = resid/( n*norma*normx*eps );
142 /*******************Compare longs ***********/
144 lresidn = (long) residn * 1000000;
145 lref = (long) ref[size] * 1000000;
147 if (lresidn > lref) {
148 //System.printString("Validation failed");
149 //System.printString("Computed Norm Res = " + (long) residn * 1000000);
150 //System.printString("Reference Norm Res = " + (long) ref[size] * 1000000);
157 double abs (double d) {
158 if (d >= 0) return d;
162 double matgen (double a[][], int lda, int n, double b[])
169 /* Next two for() statements switched. Solver wants
170 matrix in column order. --dmd 3/3/97
172 for (i = 0; i < n; i++) {
173 for (j = 0; j < n; j++) {
174 init = 3125*init % 65536;
175 a[j][i] = (init - 32768.0)/16384.0;
176 if (a[j][i] > norma) {
181 for (i = 0; i < n; i++) {
184 for (j = 0; j < n; j++) {
185 for (i = 0; i < n; i++) {
192 void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
199 // job = 0 , solve a * x = b. first solve l*y = b
201 for (k = 0; k < nm1; k++) {
209 daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
213 for (kb = 0; kb < n; kb++) {
217 daxpy(k,t,a[k],0,1,b,0,1);
220 // job = nonzero, solve trans(a) * x = b. first solve trans(u)*y = b
221 for (k = 0; k < n; k++) {
222 t = ddot(k,a[k],0,1,b,0,1);
223 b[k] = (b[k] - t)/a[k][k];
225 // now solve trans(l)*x = y
227 for (kb = 1; kb < nm1; kb++) {
230 b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
243 constant times a vector plus a vector.
244 jack dongarra, linpack, 3/11/78.
246 void daxpy( int n, double da, double dx[], int dx_off, int incx,
247 double dy[], int dy_off, int incy)
251 if ((n > 0) && (da != 0)) {
252 if (incx != 1 || incy != 1) {
253 // code for unequal increments or equal increments not equal to 1
256 if (incx < 0) ix = (-n+1)*incx;
257 if (incy < 0) iy = (-n+1)*incy;
258 for (i = 0;i < n; i++) {
259 dy[iy +dy_off] += da*dx[ix +dx_off];
265 // code for both increments equal to 1
266 for (i=0; i < n; i++)
267 dy[i +dy_off] += da*dx[i +dx_off];
273 forms the dot product of two vectors.
274 jack dongarra, linpack, 3/11/78.
276 double ddot( int n, double dx[], int dx_off, int incx, double dy[],
277 int dy_off, int incy)
283 if (incx != 1 || incy != 1) {
284 // code for unequal increments or equal increments not equal to 1
287 if (incx < 0) ix = (-n+1)*incx;
288 if (incy < 0) iy = (-n+1)*incy;
289 for (i = 0;i < n; i++) {
290 dtemp += dx[ix +dx_off]*dy[iy +dy_off];
295 // code for both increments equal to 1
297 dtemp += dx[i +dx_off]*dy[i +dy_off];
304 scales a vector by a constant.
305 jack dongarra, linpack, 3/11/78.
307 void dscal( int n, double da, double dx[], int dx_off, int incx)
312 // code for increment not equal to 1
314 for (i = 0; i < nincx; i += incx)
317 // code for increment equal to 1
318 for (i = 0; i < n; i++)
325 finds the index of element having max. absolute value.
326 jack dongarra, linpack, 3/11/78.
328 int idamax( int n, double dx[], int dx_off, int incx)
337 } else if (incx != 1) {
338 // code for increment not equal to 1
339 dmax = abs(dx[0 +dx_off]);
341 for (i = 1; i < n; i++) {
342 dtemp = abs(dx[ix + dx_off]);
350 // code for increment equal to 1
352 dmax = abs(dx[0 +dx_off]);
353 for (i = 1; i < n; i++) {
354 dtemp = abs(dx[i + dx_off]);
365 estimate unit roundoff in quantities of size x.
366 this program should function properly on all systems
367 satisfying the following two assumptions,
368 1. the base used in representing dfloating point
369 numbers is not a power of three.
370 2. the quantity a in statement 10 is represented to
371 the accuracy used in dfloating point variables
372 that are stored in memory.
373 the statement number 10 and the go to 10 are intended to
374 force optimizing compilers to generate code satisfying
376 under these assumptions, it should be true that,
377 a is not exactly equal to four-thirds,
378 b has a zero for its last bit or digit,
379 c is not exactly equal to one,
380 eps measures the separation of 1.0 from
381 the next larger dfloating point number.
382 the developers of eispack would appreciate being informed
383 about any systems where these assumptions do not hold.
385 *****************************************************************
386 this routine is one of the auxiliary routines used by eispack iii
387 to avoid machine dependencies.
388 *****************************************************************
390 this version dated 4/6/83.
392 double epslon (double x)
406 void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
409 // cleanup odd vector
410 for (j = 0; j < n2; j++) {
411 for (i = 0; i < n1; i++) {
412 y[i] += x[j]*m[j][i];