From: adash Date: Mon, 11 Jan 2010 07:35:47 +0000 (+0000) Subject: changes for manual prefetch in matrix multiply X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e2e19ce636e3773c13370796b8ed9123735ef3b9;p=IRC.git changes for manual prefetch in matrix multiply --- diff --git a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyD3.java b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyD3.java new file mode 100644 index 00000000..5c956b71 --- /dev/null +++ b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyD3.java @@ -0,0 +1,210 @@ +public class MatrixMultiply extends Thread{ + MMul mmul; + public int x0, y0, x1, y1; + public int tid, numthreads; + + public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1, int tid, int numthreads) { + this.mmul = mmul; + this.x0 = x0; + this.y0 = y0; + this.x1 = x1; + this.y1 = y1; + this.tid=tid; + this.numthreads=numthreads; + } + + public void run() { + Barrier barr=new Barrier("128.195.136.162"); + atomic { + mmul.setValues(tid, numthreads); + } + + Barrier.enterBarrier(barr); + + atomic { + short[] offsets = new short[4]; + // Prefetch mmul.btranspose[][] matrix + //Get all of B first...we need them first + offsets[0] = getoffset{MMul, btranspose}; + offsets[1] = (short) 0; + offsets[2] = (short) y0; + offsets[3] = (short) (y1 - y0 -1); + System.rangePrefetch(mmul, offsets); + + //Get first part of A + offsets[0] = getoffset{MMul, a}; + offsets[1] = (short) 0; + offsets[2] = (short) x0; + offsets[3] = (short) 15; + System.rangePrefetch(mmul, offsets); + + //Get first part of C + offsets[0] = getoffset{MMul, c}; + offsets[1] = (short) 0; + System.rangePrefetch(mmul, offsets); + short[] offsets2=new short[2]; + + double la[][][]=mmul.a; + double lc[][][]=mmul.c; + double lb[][][]=mmul.btranspose; + int M=mmul.M; + int P=mmul.P; + //Use btranspose for cache performance + for(int q=0;qx1) { + int x=x1-x0-l-1; + if (x>0) { + offsets2[1]=(short) x; + System.rangePrefetch(la, offsets2); + System.rangePrefetch(lc, offsets2); + } + } else { + offsets2[1] = (short) 15; + System.rangePrefetch(la, offsets2); + System.rangePrefetch(lc, offsets2); + } + } + for (int j = y0; j < y1; j++) { + double innerProduct=0; + double b[] = rb[j]; + for(int k = 0; k < M; k++) { + innerProduct += a[k] * b[k]; + } + c[j]=innerProduct; + } + } + } + } + } + + public static void main(String[] args) { + int NUM_THREADS = 4; + int SIZE=150; + int NUM_MATRIX = 1; + if (args.length>0) { + NUM_THREADS=Integer.parseInt(args[0]); + if (args.length>1) { + SIZE=Integer.parseInt(args[1]); + if (args.length>2) + NUM_MATRIX=Integer.parseInt(args[2]); + } + } + + int[] mid = new int[8]; + mid[0] = (128<<24)|(195<<16)|(136<<8)|162; + mid[1] = (128<<24)|(195<<16)|(136<<8)|163; + mid[2] = (128<<24)|(195<<16)|(136<<8)|164; + mid[3] = (128<<24)|(195<<16)|(136<<8)|165; + mid[4] = (128<<24)|(195<<16)|(136<<8)|166; + mid[5] = (128<<24)|(195<<16)|(136<<8)|167; + mid[6] = (128<<24)|(195<<16)|(136<<8)|168; + mid[7] = (128<<24)|(195<<16)|(136<<8)|169; + + int p, q, r; + MatrixMultiply[] mm; + MatrixMultiply tmp; + MMul matrix; + BarrierServer mybarr; + + atomic { + mybarr = global new BarrierServer(NUM_THREADS); + } + mybarr.start(mid[0]); + + + System.out.println("NUM_MATRIX= "+NUM_MATRIX+" SIZE= "+SIZE); + atomic { + matrix = global new MMul(NUM_MATRIX, SIZE, SIZE, SIZE); + mm = global new MatrixMultiply[NUM_THREADS]; + int increment=SIZE/NUM_THREADS; + int base=0; + for(int i=0;i