// Tranpose data.
if (start == 0) {
atomic {
- for(int i = 0; i<rowlength; i++) {
- double tRe[] = tempdataRe[i];
- double tIm[] = tempdataIm[i];
- for(int j = 0; j<columnlength; j++) {
- data2.dataRe[j][i] = tRe[j];
- data2.dataIm[j][i] = tIm[j];
- }
- }
+ transpose(tempdataRe,tempdataIm, data2.dataRe,data2.dataIm, rowlength, columnlength);
}
}
}
} //end of run
+ public void transpose(double[][] tempdataRe, double[][] tempdataIm, double[][] outputRe,
+ double[][] outputIm, int rowlength, int columnlength) {
+ for(int i = 0; i<rowlength; i++) {
+ double tRe[] = tempdataRe[i];
+ double tIm[] = tempdataIm[i];
+ for(int j = 0; j<columnlength; j++) {
+ outputRe[j][i] = tRe[j];
+ outputIm[j][i] = tIm[j];
+ }
+ }
+ }
+
public static void main(String[] args) {
int NUM_THREADS = 1;
int SIZE = 800;
} // End of function permute().
private static void twiddle(int factorIndex, fft1d myfft, double[] temRe, double[] temIm,
- double[] outputRe, double[] outputIm) {
+ double[] outputRe, double[] outputIm) {
// Get factor data.
int sofarRadix = myfft.sofar[factorIndex];
int radix = myfft.factors[factorIndex];
lda = ldaa + 1;
a = global new double[ldaa][lda];
+ //System.printString("row_ldaa = "+ldaa + "column_lda= "+lda+ "\n");
b = global new double [ldaa];
x = global new double [ldaa];
ipvt = global new int [ldaa];
numthreads = lub.nthreads;
}
+ int[] mid = new int[4];
+ mid[0] = (128<<24)|(195<<16)|(175<<8)|84; //dw-10
+ mid[1] = (128<<24)|(195<<16)|(175<<8)|85; //dw-11
+ mid[2] = (128<<24)|(195<<16)|(175<<8)|86; //dw-12
+ mid[3] = (128<<24)|(195<<16)|(175<<8)|87; //dw-13
+
/* spawn threads */
LinpackRunner[] thobjects;
- Barrier br;
+ BarrierServer mybarr;
atomic {
thobjects = global new LinpackRunner[numthreads];
- br = global new Barrier(numthreads);
+ mybarr = global new BarrierServer(numthreads);
}
+ mybarr.start(mid[0]);
//JGFInstrumentor.startTimer("Section2:LUFact:Kernel", instr.timers);
LinpackRunner tmp;
- int[] mid = new int[4];
- mid[0] = (128<<24)|(195<<16)|(175<<8)|73;
- mid[1] = (128<<24)|(195<<16)|(175<<8)|69;
- mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
- mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
- for(int i=1;i<numthreads;i++) {
+
+ boolean waitfordone=true;
+ while(waitfordone) {
+ atomic {
+ //System.printString("HERE #1\n");
+ if (mybarr.done)
+ waitfordone=false;
+ }
+ }
+
+ for(int i=0;i<numthreads;i++) {
atomic {
- thobjects[i] = global new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
+ thobjects[i] = global new LinpackRunner(i,lub.a,lub.lda,lub.n,lub.ipvt,lub.nthreads);
tmp = thobjects[i];
}
tmp.start(mid[i]);
}
+ /*
atomic {
- thobjects[0] = global new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,br,lub.nthreads);
+ thobjects[0] = global new LinpackRunner(0,lub.a,lub.lda,lub.n,lub.ipvt,lub.nthreads);
tmp = thobjects[0];
}
tmp.start(mid[0]);
tmp.join();
-
- for(int i=1;i<numthreads;i++) {
+*/
+ for(int i=0;i<numthreads;i++) {
atomic {
tmp = thobjects[i];
}
}
atomic {
+ //System.printString("HERE #2\n");
lub.dgesl(lub.a,lub.lda,lub.n,lub.ipvt,lub.b,0);
}
/**************************************************************************
-* *
-* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
-* *
-* produced by *
-* *
-* Java Grande Benchmarking Project *
-* *
-* at *
-* *
-* Edinburgh Parallel Computing Centre *
-* *
-* email: epcc-javagrande@epcc.ed.ac.uk *
-* *
-* *
-* This version copyright (c) The University of Edinburgh, 2001. *
-* All rights reserved. *
-* *
-**************************************************************************/
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 2001. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
class LinpackRunner extends Thread {
int id,lda,n,info,ipvt[];
double a[][];
- Barrier br;
int nthreads;
- public LinpackRunner(int id, double a[][], int lda, int n, int ipvt[],Barrier br, int nthreads) {
+ public LinpackRunner(int id, double a[][], int lda, int n, int ipvt[], int nthreads) {
this.id = id;
this.a=a;
this.lda=lda;
this.n=n;
this.ipvt=ipvt;
- this.br=br;
this.nthreads = nthreads;
}
}
public void run() {
- double[] col_k, col_j;
- double t;
- int j,k,kp1,l,nm1;
- int info;
- int slice,ilow,iupper;
- // gaussian elimination with partial pivoting
- info = 0;
- int nlocal;
- Barrier tmpbr;
- int lid;
- atomic {
- nlocal=n;
- tmpbr=br;
- lid=id;
+ Barrier barr;
+ barr = new Barrier("128.195.175.84");
+ double[] col_k, col_j;
+ double t;
+ int j,k,kp1,l,nm1;
+ int info;
+ int slice,ilow,iupper;
+ // gaussian elimination with partial pivoting
+ info = 0;
+ int nlocal;
+ int lid;
+ atomic {
+ //System.printString("Atomic #1\t");
+ nlocal=n;
+ lid=id;
+ }
+
+
+ nm1 = nlocal - 1;
+ if (nm1 >= 0) {
+ //System.printString("nm1 = " +nm1+ "\n");
+ for (k = 0; k < nm1; k++) {
+ atomic {
+ //System.printString("Atomic #2\t");
+ col_k = a[k];
+ kp1 = k + 1;
+ // find l = pivot index
+ l = idamax(nlocal-k,col_k,k,1) + k;
+ if(lid==0) {
+ ipvt[k] = l;
+ }
+ }
+ // synchronise threads
+ Barrier.enterBarrier(barr);
+
+ // zero pivot implies this column already triangularized
+ boolean b;
+ atomic {
+ //System.printString("Atomic #3\t");
+ b=col_k[l]!=0;
+ }
+ if (b) {
+ Barrier.enterBarrier(barr);
+ // interchange if necessary
+ if(lid == 0 ) {
+ if (l != k) {
+ atomic {
+ t = col_k[l];
+ col_k[l] = col_k[k];
+ col_k[k] = t;
+ }
+ }
+ }
+ // synchronise threads
+ Barrier.enterBarrier(barr);
+ // compute multipliers
+ // t = -1.0/col_k[k];
+ if(lid == 0) {
+ atomic {
+ t = -1.0/col_k[k];
+ dscal(nlocal-(kp1),t,col_k,kp1,1);
+ }
+ }
+
+ // synchronise threads
+ Barrier.enterBarrier(barr);
+
+ // row elimination with column indexing
+ atomic {
+ //System.printString("Atomic #4\t");
+ slice = ((nlocal-kp1) + nthreads-1)/nthreads;
+ ilow = (lid*slice)+kp1;
+ iupper = ((lid+1)*slice)+kp1;
+ if (iupper > nlocal ) iupper=nlocal;
+ if (ilow > nlocal ) ilow=nlocal;
+ //System.printString("ilow= " + ilow + " iupper= " + iupper + "\n");
+ for (j = ilow; j < iupper; j++) {
+ col_j = a[j];
+ t = col_j[l];
+ if (l != k) {
+ col_j[l] = col_j[k];
+ col_j[k] = t;
+ }
+ daxpy(nlocal-(kp1),t,col_k,kp1,1,
+ col_j,kp1,1);
+ }
+ }
+
+ // synchronise threads
+ Barrier.enterBarrier(barr);
+ } else {
+ info = k;
+ }
+ Barrier.enterBarrier(barr);
}
-
- nm1 = nlocal - 1;
- if (nm1 >= 0) {
- for (k = 0; k < nm1; k++) {
- atomic {
- col_k = a[k];
- kp1 = k + 1;
- // find l = pivot index
- l = idamax(nlocal-k,col_k,k,1) + k;
- if(lid==0) {
- ipvt[k] = l;
- }
- }
- // synchronise threads
- Barrier.enterBarrier(tmpbr);
- System.clearPrefetchCache();
-
- // zero pivot implies this column already triangularized
- boolean b;
- atomic {
- b=col_k[l]!=0;
- }
- if (b) {
- Barrier.enterBarrier(tmpbr);
- System.clearPrefetchCache();
- // interchange if necessary
- atomic {
- if(lid == 0 ) {
- if (l != k) {
- t = col_k[l];
- col_k[l] = col_k[k];
- col_k[k] = t;
- }
- }
- }
- // synchronise threads
- Barrier.enterBarrier(tmpbr);
- System.clearPrefetchCache();
- // compute multipliers
- atomic {
- t = -1.0/col_k[k];
- if(lid == 0) {
- dscal(nlocal-(kp1),t,col_k,kp1,1);
- }
- }
- // synchronise threads
- Barrier.enterBarrier(tmpbr);
- System.clearPrefetchCache();
- // row elimination with column indexing
- atomic {
- slice = ((nlocal-kp1) + nthreads-1)/nthreads;
- ilow = (lid*slice)+kp1;
- iupper = ((lid+1)*slice)+kp1;
- if (iupper > nlocal ) iupper=nlocal;
- if (ilow > nlocal ) ilow=nlocal;
- for (j = ilow; j < iupper; j++) {
- col_j = a[j];
- t = col_j[l];
- if (l != k) {
- col_j[l] = col_j[k];
- col_j[k] = t;
- }
- daxpy(nlocal-(kp1),t,col_k,kp1,1,
- col_j,kp1,1);
- }
- }
- // synchronise threads
- Barrier.enterBarrier(tmpbr);
- System.clearPrefetchCache();
- } else {
- info = k;
- }
- Barrier.enterBarrier(tmpbr);
- System.clearPrefetchCache();
- }
+ }
+
+ //atomic {
+ //System.printString("Atomic #5\t");
+ if(lid==0) {
+ atomic {
+ ipvt[nlocal-1] = nlocal-1;
+ }
}
-
atomic {
- if(lid==0) {
- ipvt[nlocal-1] = nlocal-1;
- }
- if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1;
+ if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1;
}
+ //}
}
/*
}
}
}
-
JGFLUFactBench.java \
JGFInstrumentor.java \
JGFTimer.java \
-Barrier.java \
LinpackRunner.java
-FLAGS=-dsm -prefetch -nooptimize -debug -profile -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -trueprob 0.95
-FLAGS2=-dsm -nooptimize -debug -profile -mainclass ${MAINCLASS}
+FLAGS=-dsm -prefetch -dsmcaching -optimize -excprefetch JGFLUFactBench.JGFLUFactBench -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -trueprob 0.90
+FLAGS2=-dsm -optimize -mainclass ${MAINCLASS}
default:
#../../../../buildscript ${FLAGS2} ${SRC}
# ../../../../buildscript ${FLAGS} ${SRC}
- ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}1NP ${SRC}
+ ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}1NPNC ${SRC}
../../../../buildscript ${FLAGS} -o ${MAINCLASS}1 ${SRC}
- cp ${MAINCLASS}1NP.bin ${MAINCLASS}2NP.bin
+ cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}2NPNC.bin
cp ${MAINCLASS}1.bin ${MAINCLASS}2.bin
- cp ${MAINCLASS}1NP.bin ${MAINCLASS}3NP.bin
+ cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}3NPNC.bin
cp ${MAINCLASS}1.bin ${MAINCLASS}3.bin
- cp ${MAINCLASS}1NP.bin ${MAINCLASS}4NP.bin
+ cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}4NPNC.bin
cp ${MAINCLASS}1.bin ${MAINCLASS}4.bin
clean: