double tempinput[][] = img.inputImage;
double tempout[][] = img.outputImage;
- double tinput0[] = tempinput[x0];
- double tinput1[] = tempinput[x0+1];
- double tinput2[] = tempinput[x0+2];
- double tinput3[] = tempinput[x0+3];
- double tinput4[] = tempinput[x0+4];
-
- for(int i=x0;i<x1;++i){
- double tout[] = tempout[x0];
+ double tinput1[] = tempinput[x0];
+ double tinput2[] = tempinput[x0+1];
+ double tinput3[] = tempinput[x0+2];
+ double tinput4[] = tempinput[x0+3];
+ double tinput0[] = tinput1;
+
+ int l=x0+4;
+ for(int i=x0;i<x1;i++,l++){
+ double tout[] = tempout[i];
+ tinput0 = tinput1; tinput1=tinput2; tinput2=tinput3; tinput3=tinput4; tinput4=tempinput[l];
for(int j=y0;j<y1;++j){
- tout[y0] = 0;
+ tout[j] = 0;
for(int b=0;b<kernelHeight;++b){
- tout[y0] = tout[y0] + (tinput0[j+b] * kernel[0][b] + tinput1[j+b] * kernel[1][b] + tinput2[j+b]*kernel[2][b] +
+ tout[j] = tout[j] + (tinput0[j+b] * kernel[0][b] + tinput1[j+b] * kernel[1][b] + tinput2[j+b]*kernel[2][b] +
tinput3[j+b]*kernel[3][b] + tinput4[j+b]*kernel[4][b]);
}
}
- if(i != 4095) {
- tinput0 = tinput1; tinput1=tinput2; tinput2=tinput3; tinput3=tinput4; tinput4=tempinput[i+5];
- }
}
}
}
}
}
+ /*
+ atomic{
+ System.printString("img.outputImage[10][20] = " +(int) img.outputImage[10][20] + "\n");
+ System.printString("img.outputImage[256][890] = " +(int) img.outputImage[256][890] + "\n");
+ }
+ */
+
for(int i = 0; i <NUM_THREADS; i++) {
atomic {
tmp = conv[i];
tmp.join();
}
System.printString("Done!");
+
+ /*
+ atomic{
+ System.printString("img.outputImage[10][20] = " +(int) img.outputImage[10][20] + "\n");
+ System.printString("img.outputImage[256][890] = " +(int) img.outputImage[256][890] + "\n");
+ }
+ */
}
//define 5X5 Gaussian kernel
int kernelHeight = 5;
int kernelWidth = 5;
double[][] kernel = new double[kernelHeight][kernelWidth];
-
initKernel(kernel);
double tempinput[][] = img.inputImage;
double tempout[][] = img.outputImage;
- double tinput0[] = tempinput[x0];
- double tinput1[] = tempinput[x0+1];
- double tinput2[] = tempinput[x0+2];
- double tinput3[] = tempinput[x0+3];
- double tinput4[] = tempinput[x0+4];
+ double tinput1[] = tempinput[x0];
+ double tinput2[] = tempinput[x0+1];
+ double tinput3[] = tempinput[x0+2];
+ double tinput4[] = tempinput[x0+3];
+ double tinput0[] = tinput1;
- for(int i=x0;i<x1;++i){
- double tout[] = tempout[x0];
+ int l=x0+4;
+ for(int i=x0;i<x1;i++,l++){
+ double tout[] = tempout[i];
+ tinput0 = tinput1; tinput1=tinput2; tinput2=tinput3; tinput3=tinput4; tinput4=tempinput[l];
for(int j=y0;j<y1;++j){
- tout[y0] = 0;
+ tout[j] = 0;
for(int b=0;b<kernelHeight;++b){
- tout[y0] = tout[y0] + (tinput0[j+b] * kernel[0][b] + tinput1[j+b] * kernel[1][b] + tinput2[j+b]*kernel[2][b] +
+ tout[j] = tout[j] + (tinput0[j+b] * kernel[0][b] + tinput1[j+b] * kernel[1][b] + tinput2[j+b]*kernel[2][b] +
tinput3[j+b]*kernel[3][b] + tinput4[j+b]*kernel[4][b]);
}
}
- if(i != 8191) {
- tinput0 = tinput1; tinput1=tinput2; tinput2=tinput3; tinput3=tinput4; tinput4=tempinput[i+5];
- }
}
}
base+=increment;
}
+ //System.printString("img.outputImage[10][20] = " +(int) img.outputImage[10][20] + "\n");
+ //System.printString("img.outputImage[256][890] = " +(int) img.outputImage[256][890] + "\n");
for(int i = 0; i <NUM_THREADS; i++) {
tmp = conv[i];
tmp.run();
}
+ //System.printString("img.outputImage[10][20] = " +(int) img.outputImage[10][20] + "\n");
+ //System.printString("img.outputImage[256][890] = " +(int) img.outputImage[256][890] + "\n");
System.printString("Done!");
}
MAINCLASS=Convolution
SRC=${MAINCLASS}.java
default:
- ../../../../buildscript -nooptimize -debug -mainclass ${MAINCLASS} ${SRC} -o ${MAINCLASS}
+ ../../../../buildscript -optimize -mainclass ${MAINCLASS} ${SRC} -o ${MAINCLASS}
clean:
rm -rf tmpbuilddirectory
SRC=${MAINCLASS}.java \
fft1d.java \
Matrix.java
-FLAGS =-dsm -dsmcaching -prefetch -optimize -excprefetch fft2d.main -excprefetch fft2d.twiddle -excprefetch fft1d.factorize -excprefetch fft1d.printFactors -excprefetch Matrix.setValues -excprefetch Matrix.setZeros -trueprob 0.90 -mainclass ${MAINCLASS}
+FLAGS =-dsm -dsmcaching -prefetch -optimize -excprefetch fft2d.main -excprefetch fft2d.twiddle -excprefetch fft1d.factorize -excprefetch fft1d.printFactors -excprefetch Matrix.setValues -excprefetch Matrix.setZeros -excprefetch fft2d.transpose -trueprob 0.90 -mainclass ${MAINCLASS}
FLAGS1=-dsm -optimize -mainclass ${MAINCLASS}
default:
for file in `ls runlog/*.txt`
do
echo -n $file
- cat $file | awk '{sum += $1} END {print " "sum/NR}'
+ cat $file | grep -v "^Command" | awk '{sum += $1} END {print " "sum/NR}'
done