1 /**************************************************************************
3 * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
7 * Java Grande Benchmarking Project *
11 * Edinburgh Parallel Computing Centre *
13 * email: epcc-javagrande@epcc.ed.ac.uk *
16 * This version copyright (c) The University of Edinburgh, 2001. *
17 * All rights reserved. *
19 **************************************************************************/
20 public class JGFLUFactBench {
23 private int[] datasizes;
24 public int cachelinesize;
28 double ops,total,norma,normx;
31 int n,i,ntimes,info,lda,ldaa,kflops;
34 public JGFLUFactBench(int nthreads) {
35 this.nthreads=nthreads;
36 datasizes = global new int[3];
43 public void JGFsetsize(int size) {
47 public void JGFinitialise() {
52 a = global new double[ldaa][lda];
53 //System.printString("row_ldaa = "+ldaa + "column_lda= "+lda+ "\n");
54 b = global new double [ldaa];
55 x = global new double [ldaa];
56 ipvt = global new int [ldaa];
58 long nl = (long) n; //avoid integer overflow
59 ops = (2.0*(nl*nl*nl))/3.0 + 2.0*(nl*nl);
60 norma = matgen(a,lda,n,b);
63 public static void JGFkernel(JGFLUFactBench lub) {
66 numthreads = lub.nthreads;
69 int[] mid = new int[4];
70 mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
71 mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dw-11
72 mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dw-12
73 mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dw-13
76 LinpackRunner[] thobjects;
79 thobjects = global new LinpackRunner[numthreads];
80 mybarr = global new BarrierServer(numthreads);
84 //JGFInstrumentor.startTimer("Section2:LUFact:Kernel", instr.timers);
87 boolean waitfordone=true;
90 //System.printString("HERE #1\n");
96 for(int i=0;i<numthreads;i++) {
98 thobjects[i] = global new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,lub.nthreads);
106 thobjects[0] = global new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,lub.nthreads);
112 for(int i=0;i<numthreads;i++) {
120 //System.printString("HERE #2\n");
121 lub.dgesl(lub.a,lub.lda,lub.n,lub.ipvt,lub.b,0);
124 //JGFInstrumentor.stopTimer("Section2:LUFact:Kernel", instr.timers);
127 public int JGFvalidate() {
137 for (i = 0; i < n; i++) {
140 norma = matgen(a,lda,n,b);
141 for (i = 0; i < n; i++) {
145 dmxpy(n,b,n,lda,x,a);
148 for (i = 0; i < n; i++) {
149 //resid = (resid > abs(b[i])) ? resid : abs(b[i]);
150 //normx = (normx > abs(x[i])) ? normx : abs(x[i]);
151 if (resid <= abs(b[i])) resid = abs(b[i]);
152 if (normx <= abs(x[i])) normx = abs(x[i]);
154 eps = epslon((double)1.0);
155 residn = resid/( n*norma*normx*eps );
157 /*******************Compare longs ***********/
159 lresidn = (long) residn * 1000000;
160 lref = (long) ref[size] * 1000000;
162 if (lresidn > lref) {
163 //System.printString("Validation failed");
164 //System.printString("Computed Norm Res = " + (long) residn * 1000000);
165 //System.printString("Reference Norm Res = " + (long) ref[size] * 1000000);
172 double abs (double d) {
173 if (d >= 0) return d;
177 double matgen (double a[][], int lda, int n, double b[])
184 /* Next two for() statements switched. Solver wants
185 matrix in column order. --dmd 3/3/97
187 for (i = 0; i < n; i++) {
188 for (j = 0; j < n; j++) {
189 init = 3125*init % 65536;
190 a[j][i] = (init - 32768.0)/16384.0;
191 if (a[j][i] > norma) {
196 for (i = 0; i < n; i++) {
199 for (j = 0; j < n; j++) {
200 for (i = 0; i < n; i++) {
207 void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
214 // job = 0 , solve a * x = b. first solve l*y = b
216 for (k = 0; k < nm1; k++) {
224 daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
228 for (kb = 0; kb < n; kb++) {
232 daxpy(k,t,a[k],0,1,b,0,1);
235 // job = nonzero, solve trans(a) * x = b. first solve trans(u)*y = b
236 for (k = 0; k < n; k++) {
237 t = ddot(k,a[k],0,1,b,0,1);
238 b[k] = (b[k] - t)/a[k][k];
240 // now solve trans(l)*x = y
242 for (kb = 1; kb < nm1; kb++) {
245 b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
258 constant times a vector plus a vector.
259 jack dongarra, linpack, 3/11/78.
261 void daxpy( int n, double da, double dx[], int dx_off, int incx,
262 double dy[], int dy_off, int incy)
266 if ((n > 0) && (da != 0)) {
267 if (incx != 1 || incy != 1) {
268 // code for unequal increments or equal increments not equal to 1
271 if (incx < 0) ix = (-n+1)*incx;
272 if (incy < 0) iy = (-n+1)*incy;
273 for (i = 0;i < n; i++) {
274 dy[iy +dy_off] += da*dx[ix +dx_off];
280 // code for both increments equal to 1
281 for (i=0; i < n; i++)
282 dy[i +dy_off] += da*dx[i +dx_off];
288 forms the dot product of two vectors.
289 jack dongarra, linpack, 3/11/78.
291 double ddot( int n, double dx[], int dx_off, int incx, double dy[],
292 int dy_off, int incy)
298 if (incx != 1 || incy != 1) {
299 // code for unequal increments or equal increments not equal to 1
302 if (incx < 0) ix = (-n+1)*incx;
303 if (incy < 0) iy = (-n+1)*incy;
304 for (i = 0;i < n; i++) {
305 dtemp += dx[ix +dx_off]*dy[iy +dy_off];
310 // code for both increments equal to 1
312 dtemp += dx[i +dx_off]*dy[i +dy_off];
319 scales a vector by a constant.
320 jack dongarra, linpack, 3/11/78.
322 void dscal( int n, double da, double dx[], int dx_off, int incx)
327 // code for increment not equal to 1
329 for (i = 0; i < nincx; i += incx)
332 // code for increment equal to 1
333 for (i = 0; i < n; i++)
340 finds the index of element having max. absolute value.
341 jack dongarra, linpack, 3/11/78.
343 int idamax( int n, double dx[], int dx_off, int incx)
352 } else if (incx != 1) {
353 // code for increment not equal to 1
354 dmax = abs(dx[0 +dx_off]);
356 for (i = 1; i < n; i++) {
357 dtemp = abs(dx[ix + dx_off]);
365 // code for increment equal to 1
367 dmax = abs(dx[0 +dx_off]);
368 for (i = 1; i < n; i++) {
369 dtemp = abs(dx[i + dx_off]);
380 estimate unit roundoff in quantities of size x.
381 this program should function properly on all systems
382 satisfying the following two assumptions,
383 1. the base used in representing dfloating point
384 numbers is not a power of three.
385 2. the quantity a in statement 10 is represented to
386 the accuracy used in dfloating point variables
387 that are stored in memory.
388 the statement number 10 and the go to 10 are intended to
389 force optimizing compilers to generate code satisfying
391 under these assumptions, it should be true that,
392 a is not exactly equal to four-thirds,
393 b has a zero for its last bit or digit,
394 c is not exactly equal to one,
395 eps measures the separation of 1.0 from
396 the next larger dfloating point number.
397 the developers of eispack would appreciate being informed
398 about any systems where these assumptions do not hold.
400 *****************************************************************
401 this routine is one of the auxiliary routines used by eispack iii
402 to avoid machine dependencies.
403 *****************************************************************
405 this version dated 4/6/83.
407 double epslon (double x)
421 void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
424 // cleanup odd vector
425 for (j = 0; j < n2; j++) {
426 for (i = 0; i < n1; i++) {
427 y[i] += x[j]*m[j][i];