Manual prefetch versions
authoradash <adash>
Fri, 30 Jan 2009 04:09:07 +0000 (04:09 +0000)
committeradash <adash>
Fri, 30 Jan 2009 04:09:07 +0000 (04:09 +0000)
Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java [new file with mode: 0644]
Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile [new file with mode: 0644]

diff --git a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java
new file mode 100644 (file)
index 0000000..d0bba69
--- /dev/null
@@ -0,0 +1,198 @@
+public class MatrixMultiply extends Thread{
+    MMul mmul;
+    public int x0, y0, x1, y1;
+    public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
+       this.mmul = mmul;
+       this.x0 = x0;
+       this.y0 = y0;
+       this.x1 = x1;
+       this.y1 = y1;
+    //System.printString("x0 = " +x0+" x1= "+x1+" y0= "+y0+" y1= "+y1+"\n");
+    }
+    
+    public void run() {
+       atomic {
+        // Prefetch mmul.a[][] matrix
+        Object o = mmul;
+        short[] offsets = new short[4];
+        offsets[0] = getoffset{MMul, a};
+        offsets[1] = (short) 0;
+        offsets[2] = (short) x0;
+        //offsets[3] = (short) (x1 - x0);
+        offsets[3] = (short) 10;
+        System.rangePrefetch(o, offsets);
+
+        // Prefetch mmul.btranspose[][] matrix
+        Object o1 = mmul;
+        short[] offsets1 = new short[4];
+        offsets1[0] = getoffset{MMul, btranspose};
+        offsets1[1] = (short) 0;
+        offsets1[2] = (short) x0;
+       // offsets1[3] = (short) (x1 - x0);
+        offsets1[3] = (short) 10;
+        System.rangePrefetch(o1, offsets1);
+
+        // Prefetch mmul.c[][] matrix
+        Object o2 = mmul;
+        short[] offsets2 = new short[4];
+        offsets2[0] = getoffset{MMul, c};
+        offsets2[1] = (short) 0;
+        offsets2[2] = (short) x0;
+        //offsets2[3] = (short) (x1 - x0);
+        offsets2[3] = (short) 10;
+        System.rangePrefetch(o2, offsets2);
+
+           double la[][]=mmul.a;
+           double lc[][]=mmul.c;
+           double lb[][]=mmul.btranspose;
+           int M=mmul.M;
+        //Use btranspose for cache performance
+           for(int i = x0; i< x1; i++){
+               double a[]=la[i];
+               double c[]=lc[i];
+               for (int j = y0; j < y1; j++) {
+                   double innerProduct=0;
+                   double b[] = lb[j];
+                   for(int k = 0; k < M; k++) {
+                       innerProduct += a[k] *b[k];
+                   }
+                   c[j]=innerProduct;
+               }
+           }
+       }
+    }
+    
+    public static void main(String[] args) {
+       int NUM_THREADS = 4;
+       int SIZE=600;
+       if (args.length>0) {
+           NUM_THREADS=Integer.parseInt(args[0]);
+           if (args.length>1)
+               SIZE=Integer.parseInt(args[1]);
+       }
+       
+       int[] mid = new int[8];
+       mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
+       mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
+       mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
+       mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
+       mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
+       mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
+       mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
+       mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
+       int p, q, r;
+       MatrixMultiply[] mm;
+       MatrixMultiply tmp;
+       MMul matrix;
+       
+       atomic {
+           matrix = global new MMul(SIZE, SIZE, SIZE);
+           matrix.setValues();
+           matrix.transpose();
+           mm = global new MatrixMultiply[NUM_THREADS];
+           int increment=SIZE/NUM_THREADS;
+           int base=0;
+           for(int i=0;i<NUM_THREADS;i++) {
+               if ((i+1)==NUM_THREADS)
+                   mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
+               else
+                   mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
+               base+=increment;
+           }
+           p = matrix.L;
+           q = matrix.M;
+           r = matrix.N;
+       }
+       
+       // print out the matrices to be multiplied
+       System.printString("\n");
+       System.printString("MatrixMultiply: L=");
+       System.printInt(p);
+       System.printString("\t");
+       System.printString("M=");
+       System.printInt(q);
+       System.printString("\t");
+       System.printString("N=");
+       System.printInt(r);
+       System.printString("\n");
+       
+       // start a thread to compute each c[l,n]
+       for (int i = 0; i < NUM_THREADS; i++) {
+           atomic {
+               tmp = mm[i];
+           }
+           tmp.start(mid[i]);
+       }
+
+       
+       // wait for them to finish
+       for (int i = 0; i < NUM_THREADS; i++) {
+           atomic {
+               tmp = mm[i];
+           }
+           tmp.join();
+       }
+       
+       // print out the result of the matrix multiply
+
+       System.printString("Finished\n");
+    }
+}
+
+public class MMul{
+
+       public int L, M, N;
+       public double[][] a;
+       public double[][] b;
+       public double[][] c;
+       public double[][] btranspose;
+
+       public MMul(int L, int M, int N) {
+               this.L = L;
+               this.M = M;
+               this.N = N;
+               a = global new double[L][M];  
+               b = global new double[M][N]; 
+               c = global new double[L][N]; 
+               btranspose = global new double[N][M];
+       }
+
+       public void setValues() {
+               for(int i = 0; i < L; i++) {
+            double ai[] = a[i];
+                       for(int j = 0; j < M; j++) {
+                               ai[j] = j+1;
+                       }
+               }
+
+               for(int i = 0; i < M; i++) {
+            double bi[] = b[i];
+                       for(int j = 0; j < N; j++) {
+                               bi[j] = j+1;
+                       }
+               }
+
+               for(int i = 0; i < L; i++) {
+            double ci[] = c[i];
+                       for(int j = 0; j < N; j++) {
+                               ci[j] = 0;
+                       }
+               }
+               for(int i = 0; i < N; i++) {
+            double btransposei[] = btranspose[i];
+                       for(int j = 0; j < M; j++) {
+                               btransposei[j] = 0;
+                       }
+               }
+       }
+
+       public void transpose() {
+               for(int row = 0; row < M; row++) {
+            double brow[] = b[row];
+                       for(int col = 0; col < N; col++) {
+                               btranspose[col][row] = brow[col];
+                       }
+               }
+       }
+}
diff --git a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile
new file mode 100644 (file)
index 0000000..22d733c
--- /dev/null
@@ -0,0 +1,13 @@
+MAINCLASS=MatrixMultiply
+SRC1=${MAINCLASS}N.java
+FLAGS=-dsm -dsmcaching -prefetch -optimize -excprefetch MatrixMultiply.main -excprefetch MMul.setValues -excprefetch MMul.transpose -mainclass ${MAINCLASS} -trueprob 0.98
+FLAGS1=-dsm -dsmcaching -rangeprefetch -optimize -excprefetch MatrixMultiply.main -excprefetch MMul.setValues -excprefetch MMul.transpose -mainclass ${MAINCLASS} -trueprob 0.98
+FLAGS2=-dsm -optimize -mainclass ${MAINCLASS}
+default:
+       ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC ${SRC1}
+       ../../../../buildscript ${FLAGS1} -o ${MAINCLASS}RangePN ${SRC1}
+       ../../../../buildscript ${FLAGS} -o ${MAINCLASS}N ${SRC1}
+
+clean:
+       rm -rf tmpbuilddirectory
+       rm *.bin