From: bdemsky Date: Tue, 10 Mar 2009 19:48:56 +0000 (+0000) Subject: changes to prefetch...hope it makes a difference... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b0d7bca57c8a4ccea361d16b196240395af8f315;p=IRC.git changes to prefetch...hope it makes a difference... --- diff --git a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java index ae2ae5d1..5b2d22cb 100644 --- a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java +++ b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java @@ -11,41 +11,51 @@ public class MatrixMultiply extends Thread{ public void run() { atomic { - // Prefetch mmul.a[][] matrix - Object o = mmul; short[] offsets = new short[4]; + // Prefetch mmul.btranspose[][] matrix + //Get all of B first...we need them first + offsets[0] = getoffset{MMul, btranspose}; + offsets[1] = (short) 0; + offsets[2] = (short) y0; + offsets[3] = (short) (y1 - y0 -1); + System.rangePrefetch(mmul, offsets); + + //Get first part of A offsets[0] = getoffset{MMul, a}; offsets[1] = (short) 0; offsets[2] = (short) x0; - offsets[3] = (short) (x1 - x0 -1); - System.rangePrefetch(o, offsets); - - // Prefetch mmul.btranspose[][] matrix - Object o1 = mmul; - short[] offsets1 = new short[4]; - offsets1[0] = getoffset{MMul, btranspose}; - offsets1[1] = (short) 0; - offsets1[2] = (short) x0; - offsets1[3] = (short) (x1 - x0 -1); - System.rangePrefetch(o1, offsets1); + offsets[3] = (short) 63; + System.rangePrefetch(mmul, offsets); - // Prefetch mmul.c[][] matrix - Object o2 = mmul; - short[] offsets2 = new short[4]; - offsets2[0] = getoffset{MMul, c}; - offsets2[1] = (short) 0; - offsets2[2] = (short) x0; - offsets2[3] = (short) (x1 - x0 -1); - System.rangePrefetch(o2, offsets2); + //Get first part of C + offsets[0] = getoffset{MMul, c}; + offsets[1] = (short) 0; + System.rangePrefetch(mmul, offsets); double la[][]=mmul.a; double lc[][]=mmul.c; double lb[][]=mmul.btranspose; int M=mmul.M; + int l=0; //Use btranspose for cache performance - for(int i = x0; i< x1; i++){ + for(int i = x0; i< x1; i++,l++){ double a[]=la[i]; double c[]=lc[i]; + if (((l+32)&63)==0) { + offsets[0] = getoffset{MMul, a}; + offsets[1] = (short) 0; + offsets[2] = (short) x0+l; + if ((x0+l+64)>x1) + offsets[3]=x1-x0-l-1; + else + offsets[3] = (short) 63; + System.rangePrefetch(mmul, offsets); + + //Get first part of C + offsets[0] = getoffset{MMul, c}; + offsets[1] = (short) 0; + System.rangePrefetch(mmul, offsets); + } for (int j = y0; j < y1; j++) { double innerProduct=0; double b[] = lb[j];