From 0573a8d7e6d48b348b51ffa1f62b9937b3e0350a Mon Sep 17 00:00:00 2001
From: adash <adash>
Date: Fri, 30 Jan 2009 04:09:07 +0000
Subject: [PATCH] Manual prefetch versions

---
 .../MatrixMultiply/MatrixMultiplyN.java       | 198 ++++++++++++++++++
 .../ManualPrefetch/MatrixMultiply/makefile    |  13 ++
 2 files changed, 211 insertions(+)
 create mode 100644 Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java
 create mode 100644 Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile
diff --git a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java
new file mode 100644
index 00000000..d0bba69c
--- /dev/null
+++ b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java
@@ -0,0 +1,198 @@
+public class MatrixMultiply extends Thread{
+    MMul mmul;
+    public int x0, y0, x1, y1;
+    public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
+	this.mmul = mmul;
+	this.x0 = x0;
+	this.y0 = y0;
+	this.x1 = x1;
+	this.y1 = y1;
+    //System.printString("x0 = " +x0+" x1= "+x1+" y0= "+y0+" y1= "+y1+"\n");
+    }
+    
+    public void run() {
+	atomic {
+        // Prefetch mmul.a[][] matrix
+        Object o = mmul;
+        short[] offsets = new short[4];
+        offsets[0] = getoffset{MMul, a};
+        offsets[1] = (short) 0;
+        offsets[2] = (short) x0;
+        //offsets[3] = (short) (x1 - x0);
+        offsets[3] = (short) 10;
+        System.rangePrefetch(o, offsets);
+
+        // Prefetch mmul.btranspose[][] matrix
+        Object o1 = mmul;
+        short[] offsets1 = new short[4];
+        offsets1[0] = getoffset{MMul, btranspose};
+        offsets1[1] = (short) 0;
+        offsets1[2] = (short) x0;
+       // offsets1[3] = (short) (x1 - x0);
+        offsets1[3] = (short) 10;
+        System.rangePrefetch(o1, offsets1);
+
+        // Prefetch mmul.c[][] matrix
+        Object o2 = mmul;
+        short[] offsets2 = new short[4];
+        offsets2[0] = getoffset{MMul, c};
+        offsets2[1] = (short) 0;
+        offsets2[2] = (short) x0;
+        //offsets2[3] = (short) (x1 - x0);
+        offsets2[3] = (short) 10;
+        System.rangePrefetch(o2, offsets2);
+
+	    double la[][]=mmul.a;
+	    double lc[][]=mmul.c;
+	    double lb[][]=mmul.btranspose;
+	    int M=mmul.M;
+        //Use btranspose for cache performance
+	    for(int i = x0; i< x1; i++){
+		double a[]=la[i];
+		double c[]=lc[i];
+		for (int j = y0; j < y1; j++) {
+		    double innerProduct=0;
+		    double b[] = lb[j];
+		    for(int k = 0; k < M; k++) {
+			innerProduct += a[k] *b[k];
+		    }
+		    c[j]=innerProduct;
+		}
+	    }
+	}
+    }
+    
+    public static void main(String[] args) {
+	int NUM_THREADS = 4;
+	int SIZE=600;
+	if (args.length>0) {
+	    NUM_THREADS=Integer.parseInt(args[0]);
+	    if (args.length>1)
+		SIZE=Integer.parseInt(args[1]);
+	}
+	
+	int[] mid = new int[8];
+	mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
+	mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
+	mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
+	mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
+	mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
+	mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
+	mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
+	mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
+ 
+	int p, q, r;
+	MatrixMultiply[] mm;
+	MatrixMultiply tmp;
+	MMul matrix;
+	
+	atomic {
+	    matrix = global new MMul(SIZE, SIZE, SIZE);
+	    matrix.setValues();
+	    matrix.transpose();
+	    mm = global new MatrixMultiply[NUM_THREADS];
+	    int increment=SIZE/NUM_THREADS;
+	    int base=0;
+	    for(int i=0;i<NUM_THREADS;i++) {
+		if ((i+1)==NUM_THREADS)
+		    mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
+		else
+		    mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
+		base+=increment;
+	    }
+	    p = matrix.L;
+	    q = matrix.M;
+	    r = matrix.N;
+	}
+	
+	// print out the matrices to be multiplied
+	System.printString("\n");
+	System.printString("MatrixMultiply: L=");
+	System.printInt(p);
+	System.printString("\t");
+	System.printString("M=");
+	System.printInt(q);
+	System.printString("\t");
+	System.printString("N=");
+	System.printInt(r);
+	System.printString("\n");
+	
+	// start a thread to compute each c[l,n]
+	for (int i = 0; i < NUM_THREADS; i++) {
+	    atomic {
+		tmp = mm[i];
+	    }
+	    tmp.start(mid[i]);
+	}
+
+	
+	// wait for them to finish
+	for (int i = 0; i < NUM_THREADS; i++) {
+	    atomic {
+		tmp = mm[i];
+	    }
+	    tmp.join();
+	}
+	
+	// print out the result of the matrix multiply
+
+	System.printString("Finished\n");
+    }
+}
+
+public class MMul{
+
+	public int L, M, N;
+	public double[][] a;
+	public double[][] b;
+	public double[][] c;
+	public double[][] btranspose;
+
+	public MMul(int L, int M, int N) {
+		this.L = L;
+		this.M = M;
+		this.N = N;
+		a = global new double[L][M];  
+		b = global new double[M][N]; 
+		c = global new double[L][N]; 
+		btranspose = global new double[N][M];
+	}
+
+	public void setValues() {
+		for(int i = 0; i < L; i++) {
+            double ai[] = a[i];
+			for(int j = 0; j < M; j++) {
+				ai[j] = j+1;
+			}
+		}
+
+		for(int i = 0; i < M; i++) {
+            double bi[] = b[i];
+			for(int j = 0; j < N; j++) {
+				bi[j] = j+1;
+			}
+		}
+
+		for(int i = 0; i < L; i++) {
+            double ci[] = c[i];
+			for(int j = 0; j < N; j++) {
+				ci[j] = 0;
+			}
+		}
+		for(int i = 0; i < N; i++) {
+            double btransposei[] = btranspose[i];
+			for(int j = 0; j < M; j++) {
+				btransposei[j] = 0;
+			}
+		}
+	}
+
+	public void transpose() {
+		for(int row = 0; row < M; row++) {
+            double brow[] = b[row];
+			for(int col = 0; col < N; col++) {
+				btranspose[col][row] = brow[col];
+			}
+		}
+	}
+}
diff --git a/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile
new file mode 100644
index 00000000..22d733c4
--- /dev/null
+++ b/Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/makefile
@@ -0,0 +1,13 @@
+MAINCLASS=MatrixMultiply
+SRC1=${MAINCLASS}N.java
+FLAGS=-dsm -dsmcaching -prefetch -optimize -excprefetch MatrixMultiply.main -excprefetch MMul.setValues -excprefetch MMul.transpose -mainclass ${MAINCLASS} -trueprob 0.98
+FLAGS1=-dsm -dsmcaching -rangeprefetch -optimize -excprefetch MatrixMultiply.main -excprefetch MMul.setValues -excprefetch MMul.transpose -mainclass ${MAINCLASS} -trueprob 0.98
+FLAGS2=-dsm -optimize -mainclass ${MAINCLASS}
+default:
+	../../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC ${SRC1}
+	../../../../buildscript ${FLAGS1} -o ${MAINCLASS}RangePN ${SRC1}
+	../../../../buildscript ${FLAGS} -o ${MAINCLASS}N ${SRC1}
+
+clean:
+	rm -rf tmpbuilddirectory
+	rm *.bin
-- 
2.34.1