From 9f9e19c7df5d69288e4bb0f53ea068fb3afe12d0 Mon Sep 17 00:00:00 2001 From: adash Date: Wed, 12 Nov 2008 04:09:34 +0000 Subject: [PATCH] latest changes to 2DFFT and LUFact benchmark --- .../Benchmarks/Prefetch/2DFFT/dsm/fft2d.java | 23 +- .../Prefetch/LUFact/dsm/JGFLUFactBench.java | 39 ++- .../LUFact/dsm/JGFLUFactBenchSizeA.java | 1 + .../Prefetch/LUFact/dsm/LinpackRunner.java | 239 +++++++++--------- .../Benchmarks/Prefetch/LUFact/dsm/makefile | 13 +- 5 files changed, 171 insertions(+), 144 deletions(-) diff --git a/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java b/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java index 97724138..3c7a6563 100644 --- a/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java +++ b/Robust/src/Benchmarks/Prefetch/2DFFT/dsm/fft2d.java @@ -53,14 +53,7 @@ public class fft2d extends Thread { // Tranpose data. if (start == 0) { atomic { - for(int i = 0; i= 0) { + //System.printString("nm1 = " +nm1+ "\n"); + for (k = 0; k < nm1; k++) { + atomic { + //System.printString("Atomic #2\t"); + col_k = a[k]; + kp1 = k + 1; + // find l = pivot index + l = idamax(nlocal-k,col_k,k,1) + k; + if(lid==0) { + ipvt[k] = l; + } + } + // synchronise threads + Barrier.enterBarrier(barr); + + // zero pivot implies this column already triangularized + boolean b; + atomic { + //System.printString("Atomic #3\t"); + b=col_k[l]!=0; + } + if (b) { + Barrier.enterBarrier(barr); + // interchange if necessary + if(lid == 0 ) { + if (l != k) { + atomic { + t = col_k[l]; + col_k[l] = col_k[k]; + col_k[k] = t; + } + } + } + // synchronise threads + Barrier.enterBarrier(barr); + // compute multipliers + // t = -1.0/col_k[k]; + if(lid == 0) { + atomic { + t = -1.0/col_k[k]; + dscal(nlocal-(kp1),t,col_k,kp1,1); + } + } + + // synchronise threads + Barrier.enterBarrier(barr); + + // row elimination with column indexing + atomic { + //System.printString("Atomic #4\t"); + slice = ((nlocal-kp1) + nthreads-1)/nthreads; + ilow = (lid*slice)+kp1; + iupper = ((lid+1)*slice)+kp1; + if (iupper > nlocal ) iupper=nlocal; + if (ilow > nlocal ) ilow=nlocal; + //System.printString("ilow= " + ilow + " iupper= " + iupper + "\n"); + for (j = ilow; j < iupper; j++) { + col_j = a[j]; + t = col_j[l]; + if (l != k) { + col_j[l] = col_j[k]; + col_j[k] = t; + } + daxpy(nlocal-(kp1),t,col_k,kp1,1, + col_j,kp1,1); + } + } + + // synchronise threads + Barrier.enterBarrier(barr); + } else { + info = k; + } + Barrier.enterBarrier(barr); } - - nm1 = nlocal - 1; - if (nm1 >= 0) { - for (k = 0; k < nm1; k++) { - atomic { - col_k = a[k]; - kp1 = k + 1; - // find l = pivot index - l = idamax(nlocal-k,col_k,k,1) + k; - if(lid==0) { - ipvt[k] = l; - } - } - // synchronise threads - Barrier.enterBarrier(tmpbr); - System.clearPrefetchCache(); - - // zero pivot implies this column already triangularized - boolean b; - atomic { - b=col_k[l]!=0; - } - if (b) { - Barrier.enterBarrier(tmpbr); - System.clearPrefetchCache(); - // interchange if necessary - atomic { - if(lid == 0 ) { - if (l != k) { - t = col_k[l]; - col_k[l] = col_k[k]; - col_k[k] = t; - } - } - } - // synchronise threads - Barrier.enterBarrier(tmpbr); - System.clearPrefetchCache(); - // compute multipliers - atomic { - t = -1.0/col_k[k]; - if(lid == 0) { - dscal(nlocal-(kp1),t,col_k,kp1,1); - } - } - // synchronise threads - Barrier.enterBarrier(tmpbr); - System.clearPrefetchCache(); - // row elimination with column indexing - atomic { - slice = ((nlocal-kp1) + nthreads-1)/nthreads; - ilow = (lid*slice)+kp1; - iupper = ((lid+1)*slice)+kp1; - if (iupper > nlocal ) iupper=nlocal; - if (ilow > nlocal ) ilow=nlocal; - for (j = ilow; j < iupper; j++) { - col_j = a[j]; - t = col_j[l]; - if (l != k) { - col_j[l] = col_j[k]; - col_j[k] = t; - } - daxpy(nlocal-(kp1),t,col_k,kp1,1, - col_j,kp1,1); - } - } - // synchronise threads - Barrier.enterBarrier(tmpbr); - System.clearPrefetchCache(); - } else { - info = k; - } - Barrier.enterBarrier(tmpbr); - System.clearPrefetchCache(); - } + } + + //atomic { + //System.printString("Atomic #5\t"); + if(lid==0) { + atomic { + ipvt[nlocal-1] = nlocal-1; + } } - atomic { - if(lid==0) { - ipvt[nlocal-1] = nlocal-1; - } - if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1; + if (a[(nlocal-1)][(nlocal-1)] == 0) info = nlocal-1; } + //} } /* @@ -230,4 +238,3 @@ class LinpackRunner extends Thread { } } } - diff --git a/Robust/src/Benchmarks/Prefetch/LUFact/dsm/makefile b/Robust/src/Benchmarks/Prefetch/LUFact/dsm/makefile index 3ad1cc01..100f37e2 100644 --- a/Robust/src/Benchmarks/Prefetch/LUFact/dsm/makefile +++ b/Robust/src/Benchmarks/Prefetch/LUFact/dsm/makefile @@ -3,21 +3,20 @@ SRC=${MAINCLASS}.java \ JGFLUFactBench.java \ JGFInstrumentor.java \ JGFTimer.java \ -Barrier.java \ LinpackRunner.java -FLAGS=-dsm -prefetch -nooptimize -debug -profile -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -trueprob 0.95 -FLAGS2=-dsm -nooptimize -debug -profile -mainclass ${MAINCLASS} +FLAGS=-dsm -prefetch -dsmcaching -optimize -excprefetch JGFLUFactBench.JGFLUFactBench -excprefetch JGFLUFactBench.JGFkernel -excprefetch JGFLUFactBench.dmxpy -excprefetch JGFLUFactBench.JGFvalidate -excprefetch JGFLUFactBench.JGFinitialise -excprefetch JGFLUFactBench.matgen -excprefetch JGFLUFactBench.dgesl -mainclass ${MAINCLASS} -trueprob 0.90 +FLAGS2=-dsm -optimize -mainclass ${MAINCLASS} default: #../../../../buildscript ${FLAGS2} ${SRC} # ../../../../buildscript ${FLAGS} ${SRC} - ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}1NP ${SRC} + ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}1NPNC ${SRC} ../../../../buildscript ${FLAGS} -o ${MAINCLASS}1 ${SRC} - cp ${MAINCLASS}1NP.bin ${MAINCLASS}2NP.bin + cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}2NPNC.bin cp ${MAINCLASS}1.bin ${MAINCLASS}2.bin - cp ${MAINCLASS}1NP.bin ${MAINCLASS}3NP.bin + cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}3NPNC.bin cp ${MAINCLASS}1.bin ${MAINCLASS}3.bin - cp ${MAINCLASS}1NP.bin ${MAINCLASS}4NP.bin + cp ${MAINCLASS}1NPNC.bin ${MAINCLASS}4NPNC.bin cp ${MAINCLASS}1.bin ${MAINCLASS}4.bin clean: -- 2.34.1