public void run() {
atomic {
+ double la[][]=mmul.a;
+ double lc[][]=mmul.c;
+ double lb[][]=mmul.btranspose;
+ int M=mmul.M;
+
//Use btranspose for cache performance
- for(int i = x0; i<= x1; i++){
- double a[]=mmul.a[i];
- double c[]=mmul.c[i];
- int M=mmul.M;
- for (int j = y0; j <= y1; j++) {
+ for(int i = x0; i< x1; i++){
+ double a[]=la[i];
+ double c[]=lc[i];
+ for (int j = y0; j < y1; j++) {
double innerProduct=0;
- double b[] = mmul.btranspose[j];
+ double b[] = lb[j];
for(int k = 0; k < M; k++) {
innerProduct += a[k] *b[k];
}
SIZE=Integer.parseInt(args[1]);
}
- int[] mid = new int[NUM_THREADS];
- mid[0] = (128<<24)|(195<<16)|(175<<8)|80;
- mid[1] = (128<<24)|(195<<16)|(175<<8)|73;
- mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
+ int[] mid = new int[4];
+ mid[0] = (128<<24)|(195<<16)|(175<<8)|69;
+ mid[1] = (128<<24)|(195<<16)|(175<<8)|70;
+ mid[2] = (128<<24)|(195<<16)|(175<<8)|71;
mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
int p, q, r;
MatrixMultiply[] mm;
int base=0;
for(int i=0;i<NUM_THREADS;i++) {
if ((i+1)==NUM_THREADS)
- mm[i]=global new MatrixMultiply(matrix,base, SIZE-1, 0, SIZE-1);
+ mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
else
- mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE-1);
+ mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
base+=increment;
}
p = matrix.L;