public class MatrixMultiply extends Thread{
MMul mmul;
public int x0, y0, x1, y1;
-
public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
this.mmul = mmul;
this.x0 = x0;
public void run() {
atomic {
+ double la[][]=mmul.a;
+ double lc[][]=mmul.c;
+ double lb[][]=mmul.btranspose;
+ int M=mmul.M;
+
//Use btranspose for cache performance
- for(int i = x0; i<= x1; i++){
- double a[]=mmul.a[i];
- double c[]=mmul.c[i];
- int M=mmul.M;
- for (int j = y0; j <= y1; j++) {
+ for(int i = x0; i< x1; i++){
+ double a[]=la[i];
+ double c[]=lc[i];
+ for (int j = y0; j < y1; j++) {
double innerProduct=0;
- double b[] = mmul.btranspose[j];
+ double b[] = lb[j];
for(int k = 0; k < M; k++) {
innerProduct += a[k] *b[k];
}
SIZE=Integer.parseInt(args[1]);
}
- int[] mid = new int[4];
- mid[0] = (128<<24)|(195<<16)|(175<<8)|80;
- mid[1] = (128<<24)|(195<<16)|(175<<8)|73;
- mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
- mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
+ int[] mid = new int[8];
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dw-11
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dw-12
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dw-13
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dw-14
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dw-15
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dw-16
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dw-17
+
int p, q, r;
MatrixMultiply[] mm;
MatrixMultiply tmp;
int base=0;
for(int i=0;i<NUM_THREADS;i++) {
if ((i+1)==NUM_THREADS)
- mm[i]=global new MatrixMultiply(matrix,base, SIZE-1, 0, SIZE-1);
+ mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
else
- mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE-1);
+ mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
base+=increment;
}
p = matrix.L;
}
tmp.start(mid[i]);
}
+
// wait for them to finish
for (int i = 0; i < NUM_THREADS; i++) {