default:
../../../../buildscript ${FLAGS1} ${SRC} -o ${MAINCLASS}NPNC
../../../../buildscript ${FLAGS} ${SRC} -o ${MAINCLASS}N
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}1NPNC.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}2NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}2.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}3NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}3.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}4NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}4.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}5NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}5.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}6NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}6.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}7NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}7.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}8NPNC.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}8.bin
clean:
rm -rf tmpbuilddirectory
MAINCLASS=Convolution
SRC=${MAINCLASS}.java
default:
- ../../../../buildscript -nooptimize -debug -mainclass ${MAINCLASS} ${SRC} -o ${MAINCLASS}
+ ../../../../buildscript -optimize -thread -debug -mainclass ${MAINCLASS} ${SRC} -o ${MAINCLASS}
clean:
rm -rf tmpbuilddirectory
+++ /dev/null
-//Title: 1-d mixed radix FFT.
-//Version:
-//Copyright: Copyright (c) 1998
-//Author: Dongyan Wang
-//Company: University of Wisconsin-Milwaukee.
-//Description:
-// The number of DFT is factorized.
-//
-// Some short FFTs, such as length 2, 3, 4, 5, 8, 10, are used
-// to improve the speed.
-//
-// Prime factors are processed using DFT. In the future, we can
-// improve this part.
-// Note: there is no limit how large the prime factor can be,
-// because for a set of data of an image, the length can be
-// random, ie. an image can have size 263 x 300, where 263 is
-// a large prime factor.
-//
-// A permute() function is used to make sure FFT can be calculated
-// in place.
-//
-// A triddle() function is used to perform the FFT.
-//
-// This program is for FFT of complex data, if the input is real,
-// the program can be further improved. Because I want to use the
-// same program to do IFFT, whose input is often complex, so I
-// still use this program.
-//
-// To save the memory and improve the speed, double data are used
-// instead of double, but I do have a double version transforms.fft.
-//
-// Factorize() is done in constructor, transforms.fft() is needed to be
-// called to do FFT, this is good for use in fft2d, then
-// factorize() is not needed for each row/column of data, since
-// each row/column of a matrix has the same length.
-//
-
-
-public class FFT1d {
- // Maximum numbers of factors allowed.
- //private static final int MaxFactorsNumber = 30;
- private static final int MaxFactorsNumber = 37;
-
- // cos2to3PI = cos(2*pi/3), using for 3 point FFT.
- // cos(2*PI/3) is not -1.5
- private static final double cos2to3PI = -1.5000f;
- // sin2to3PI = sin(2*pi/3), using for 3 point FFT.
- private static final double sin2to3PI = 8.6602540378444E-01f;
-
- // TwotoFivePI = 2*pi/5.
- // c51, c52, c53, c54, c55 are used in fft5().
- // c51 =(cos(TwotoFivePI)+cos(2*TwotoFivePI))/2-1.
- private static final double c51 = -1.25f;
- // c52 =(cos(TwotoFivePI)-cos(2*TwotoFivePI))/2.
- private static final double c52 = 5.5901699437495E-01f;
- // c53 = -sin(TwotoFivePI).
- private static final double c53 = -9.5105651629515E-01f;
- // c54 =-(sin(TwotoFivePI)+sin(2*TwotoFivePI)).
- private static final double c54 = -1.5388417685876E+00f;
- // c55 =(sin(TwotoFivePI)-sin(2*TwotoFivePI)).
- private static final double c55 = 3.6327126400268E-01f;
-
- // OnetoSqrt2 = 1/sqrt(2), used in fft8().
- private static final double OnetoSqrt2 = 7.0710678118655E-01f;
-
- private static int lastRadix = 0;
-
- int N; // length of N point FFT.
- int NumofFactors; // Number of factors of N.
- static final int maxFactor = 20; // Maximum factor of N.
-
- int factors[]; // Factors of N processed in the current stage.
- int sofar[]; // Finished factors before the current stage.
- int remain[]; // Finished factors after the current stage.
-
- double inputRe[], inputIm[]; // Input of FFT.
- double temRe[], temIm[]; // Intermediate result of FFT.
- double outputRe[], outputIm[]; // Output of FFT.
- //static boolean factorsWerePrinted = false;
- boolean factorsWerePrinted = false;
-
- // Constructor: FFT of Complex data.
- public FFT1d(int N) {
- this.N = N;
- outputRe = new double[N];
- outputIm = new double[N];
-
- factorize();
- //printFactors();
-
- // Allocate memory for intermediate result of FFT.
- temRe = new double[maxFactor];
- temIm = new double[maxFactor];
- }
-
- public void fft(double inputRe[], double inputIm[]) {
- // First make sure inputRe & inputIm are of the same length.
- if (inputRe.length != N || inputIm.length != N) {
- System.out.println("Error: the length of real part & imaginary part " +
- "of the input to 1-d FFT are different");
- return;
- } else {
- this.inputRe = inputRe;
- this.inputIm = inputIm;
-
- permute();
- //System.out.println("ready to twiddle");
-
- for (int factorIndex = 0; factorIndex < NumofFactors; factorIndex++)
- twiddle(factorIndex);
- //System.out.println("ready to copy");
-
- // Copy the output[] data to input[], so the output can be
- // returned in the input array.
- for (int i = 0; i < N; i++) {
- inputRe[i] = outputRe[i];
- inputIm[i] = outputIm[i];
- }
-
- }
- }
-
- public void printFactors() {
- if (factorsWerePrinted) return;
- factorsWerePrinted = true;
- //System.out.println("factors.length = " + factors.length + "\n");
- for (int i = 0; i < factors.length; i++)
- System.out.println("factors[i] = " + factors[i]);
- }
-
- private void factorize() {
- int radices[] = {2, 3, 4, 5, 8, 10};
- int temFactors[] = new int[MaxFactorsNumber];
-
- // 1 - point FFT, no need to factorize N.
- if (N == 1) {
- temFactors[0] = 1;
- NumofFactors = 1;
- }
-
- // N - point FFT, N is needed to be factorized.
- int n = N;
- int index = 0; // index of temFactors.
- int i = radices.length - 1;
-
- while ((n > 1) && (i >= 0)) {
- if ((n % radices[i]) == 0) {
- n /= radices[i];
- temFactors[index++] = radices[i];
- } else
- i--;
- }
-
- // Substitute 2x8 with 4x4.
- // index>0, in the case only one prime factor, such as N=263.
- if ((index > 0) && (temFactors[index - 1] == 2))
- for (i = index - 2; i >= 0; i--)
- if (temFactors[i] == 8) {
- temFactors[index - 1] = temFactors[i] = 4;
- // break out of for loop, because only one '2' will exist in
- // temFactors, so only one substitutation is needed.
- break;
- }
-
- if (n > 1) {
- for (int k = 2; k < Math.sqrt(n) + 1; k++)
- while ((n % k) == 0) {
- n /= k;
- temFactors[index++] = k;
- }
- if (n > 1) {
- temFactors[index++] = n;
- }
- }
- NumofFactors = index;
- /*
- if(temFactors[NumofFactors-1] > 10)
- maxFactor = n;
- else
- maxFactor = 10;
- */
-
- // Inverse temFactors and store factors into factors[].
- factors = new int[NumofFactors];
- for (i = 0; i < NumofFactors; i++) {
- factors[i] = temFactors[NumofFactors - i - 1];
- }
-
- // Calculate sofar[], remain[].
- // sofar[] : finished factors before the current stage.
- // factors[]: factors of N processed in the current stage.
- // remain[] : finished factors after the current stage.
- sofar = new int[NumofFactors];
- remain = new int[NumofFactors];
-
- remain[0] = N / factors[0];
- sofar[0] = 1;
- for (i = 1; i < NumofFactors; i++) {
- sofar[i] = sofar[i - 1] * factors[i - 1];
- remain[i] = remain[i - 1] / factors[i];
- }
- } // End of function factorize().
-
- private void permute() {
- int count[] = new int[MaxFactorsNumber];
- int j;
- int k = 0;
-
- for (int i = 0; i < N - 1; i++) {
- outputRe[i] = inputRe[k];
- outputIm[i] = inputIm[k];
- j = 0;
- k = k + remain[j];
- count[0] = count[0] + 1;
- while (count[j] >= factors[j]) {
- count[j] = 0;
- k = k - (j == 0?N:remain[j - 1]) + remain[j + 1];
- j++;
- count[j] = count[j] + 1;
- }
- }
- outputRe[N - 1] = inputRe[N - 1];
- outputIm[N - 1] = inputIm[N - 1];
- } // End of function permute().
-
- private void twiddle(int factorIndex) {
- // Get factor data.
- int sofarRadix = sofar[factorIndex];
- int radix = factors[factorIndex];
- int remainRadix = remain[factorIndex];
-
- double tem; // Temporary variable to do data exchange.
-
- double W = 2 * (double) Math.PI / (sofarRadix * radix);
- double cosW = (double) Math.cos(W);
- double sinW = -(double) Math.sin(W);
-
- double twiddleRe[] = new double[radix];
- double twiddleIm[] = new double[radix];
- double twRe = 1.0f, twIm = 0f;
-
- //Initialize twiddle addBk.address variables.
- int dataOffset = 0, groupOffset = 0, address = 0;
-
- for (int dataNo = 0; dataNo < sofarRadix; dataNo++) {
- //System.out.println("datano="+dataNo);
- if (sofarRadix > 1) {
- twiddleRe[0] = 1.0f;
- twiddleIm[0] = 0.0f;
- twiddleRe[1] = twRe;
- twiddleIm[1] = twIm;
- for (int i = 2; i < radix; i++) {
-
-
- twiddleRe[i] = twRe * twiddleRe[i - 1] - twIm * twiddleIm[i - 1];
- twiddleIm[i] = twIm * twiddleRe[i - 1] + twRe * twiddleIm[i - 1];
- }
- tem = cosW * twRe - sinW * twIm;
- twIm = sinW * twRe + cosW * twIm;
- twRe = tem;
- }
- for (int groupNo = 0; groupNo < remainRadix; groupNo++) {
- //System.out.println("groupNo="+groupNo);
- if ((sofarRadix > 1) && (dataNo > 0)) {
- temRe[0] = outputRe[address];
- temIm[0] = outputIm[address];
- int blockIndex = 1;
- do {
- address = address + sofarRadix;
- temRe[blockIndex] = twiddleRe[blockIndex] * outputRe[address] -
- twiddleIm[blockIndex] * outputIm[address];
- temIm[blockIndex] = twiddleRe[blockIndex] * outputIm[address] +
- twiddleIm[blockIndex] * outputRe[address];
- blockIndex++;
- } while (blockIndex < radix);
- } else
- for (int i = 0; i < radix; i++) {
- //System.out.println("temRe.length="+temRe.length);
- //System.out.println("i = "+i);
- temRe[i] = outputRe[address];
- temIm[i] = outputIm[address];
- address += sofarRadix;
- }
- //System.out.println("radix="+radix);
- switch (radix) {
- case 2:
- tem = temRe[0] + temRe[1];
- temRe[1] = temRe[0] - temRe[1];
- temRe[0] = tem;
- tem = temIm[0] + temIm[1];
- temIm[1] = temIm[0] - temIm[1];
- temIm[0] = tem;
- break;
- case 3:
- double t1Re = temRe[1] + temRe[2];
- double t1Im = temIm[1] + temIm[2];
- temRe[0] = temRe[0] + t1Re;
- temIm[0] = temIm[0] + t1Im;
-
- double m1Re = cos2to3PI * t1Re;
- double m1Im = cos2to3PI * t1Im;
- double m2Re = sin2to3PI * (temIm[1] - temIm[2]);
- double m2Im = sin2to3PI * (temRe[2] - temRe[1]);
- double s1Re = temRe[0] + m1Re;
- double s1Im = temIm[0] + m1Im;
-
- temRe[1] = s1Re + m2Re;
- temIm[1] = s1Im + m2Im;
- temRe[2] = s1Re - m2Re;
- temIm[2] = s1Im - m2Im;
- break;
- case 4:
- fft4(temRe, temIm);
- break;
- case 5:
- fft5(temRe, temIm);
- break;
- case 8:
- fft8();
- break;
- case 10:
- fft10();
- break;
- default :
- fftPrime(radix);
- break;
- }
- address = groupOffset;
- for (int i = 0; i < radix; i++) {
- outputRe[address] = temRe[i];
- outputIm[address] = temIm[i];
- address += sofarRadix;
- }
- groupOffset += sofarRadix * radix;
- address = groupOffset;
- }
- groupOffset = ++dataOffset;
- address = groupOffset;
- }
- } // End of function twiddle().
-
- // The two arguments dataRe[], dataIm[] are mainly for using in fft8();
- private void fft4(double dataRe[], double dataIm[]) {
- double t1Re,t1Im, t2Re,t2Im;
- double m2Re,m2Im, m3Re,m3Im;
-
- t1Re = dataRe[0] + dataRe[2];
- t1Im = dataIm[0] + dataIm[2];
- t2Re = dataRe[1] + dataRe[3];
- t2Im = dataIm[1] + dataIm[3];
-
- m2Re = dataRe[0] - dataRe[2];
- m2Im = dataIm[0] - dataIm[2];
- m3Re = dataIm[1] - dataIm[3];
- m3Im = dataRe[3] - dataRe[1];
-
- dataRe[0] = t1Re + t2Re;
- dataIm[0] = t1Im + t2Im;
- dataRe[2] = t1Re - t2Re;
- dataIm[2] = t1Im - t2Im;
- dataRe[1] = m2Re + m3Re;
- dataIm[1] = m2Im + m3Im;
- dataRe[3] = m2Re - m3Re;
- dataIm[3] = m2Im - m3Im;
- } // End of function fft4().
-
- // The two arguments dataRe[], dataIm[] are mainly for using in fft10();
- private void fft5(double dataRe[], double dataIm[]) {
- double t1Re,t1Im, t2Re,t2Im, t3Re,t3Im, t4Re,t4Im, t5Re,t5Im;
- double m1Re,m1Im, m2Re,m2Im, m3Re,m3Im, m4Re,m4Im, m5Re,m5Im;
- double s1Re,s1Im, s2Re,s2Im, s3Re,s3Im, s4Re,s4Im, s5Re,s5Im;
-
- t1Re = dataRe[1] + dataRe[4];
- t1Im = dataIm[1] + dataIm[4];
- t2Re = dataRe[2] + dataRe[3];
- t2Im = dataIm[2] + dataIm[3];
- t3Re = dataRe[1] - dataRe[4];
- t3Im = dataIm[1] - dataIm[4];
- t4Re = dataRe[3] - dataRe[2];
- t4Im = dataIm[3] - dataIm[2];
- t5Re = t1Re + t2Re;
- t5Im = t1Im + t2Im;
-
- dataRe[0] = dataRe[0] + t5Re;
- dataIm[0] = dataIm[0] + t5Im;
-
- m1Re = c51 * t5Re;
- m1Im = c51 * t5Im;
- m2Re = c52 * (t1Re - t2Re);
- m2Im = c52 * (t1Im - t2Im);
- m3Re = -c53 * (t3Im + t4Im);
- m3Im = c53 * (t3Re + t4Re);
- m4Re = -c54 * t4Im;
- m4Im = c54 * t4Re;
- m5Re = -c55 * t3Im;
- m5Im = c55 * t3Re;
-
- s3Re = m3Re - m4Re;
- s3Im = m3Im - m4Im;
- s5Re = m3Re + m5Re;
- s5Im = m3Im + m5Im;
- s1Re = dataRe[0] + m1Re;
- s1Im = dataIm[0] + m1Im;
- s2Re = s1Re + m2Re;
- s2Im = s1Im + m2Im;
- s4Re = s1Re - m2Re;
- s4Im = s1Im - m2Im;
-
- dataRe[1] = s2Re + s3Re;
- dataIm[1] = s2Im + s3Im;
- dataRe[2] = s4Re + s5Re;
- dataIm[2] = s4Im + s5Im;
- dataRe[3] = s4Re - s5Re;
- dataIm[3] = s4Im - s5Im;
- dataRe[4] = s2Re - s3Re;
- dataIm[4] = s2Im - s3Im;
- } // End of function fft5().
-
- private void fft8() {
- double data1Re[] = new double[4];
- double data1Im[] = new double[4];
- double data2Re[] = new double[4];
- double data2Im[] = new double[4];
- double tem;
-
- // To improve the speed, use direct assaignment instead for loop here.
- data1Re[0] = temRe[0];
- data2Re[0] = temRe[1];
- data1Re[1] = temRe[2];
- data2Re[1] = temRe[3];
- data1Re[2] = temRe[4];
- data2Re[2] = temRe[5];
- data1Re[3] = temRe[6];
- data2Re[3] = temRe[7];
-
- data1Im[0] = temIm[0];
- data2Im[0] = temIm[1];
- data1Im[1] = temIm[2];
- data2Im[1] = temIm[3];
- data1Im[2] = temIm[4];
- data2Im[2] = temIm[5];
- data1Im[3] = temIm[6];
- data2Im[3] = temIm[7];
-
- fft4(data1Re, data1Im);
- fft4(data2Re, data2Im);
-
- tem = OnetoSqrt2 * (data2Re[1] + data2Im[1]);
- data2Im[1] = OnetoSqrt2 * (data2Im[1] - data2Re[1]);
- data2Re[1] = tem;
- tem = data2Im[2];
- data2Im[2] = -data2Re[2];
- data2Re[2] = tem;
- tem = OnetoSqrt2 * (data2Im[3] - data2Re[3]);
- data2Im[3] = -OnetoSqrt2 * (data2Re[3] + data2Im[3]);
- data2Re[3] = tem;
-
- temRe[0] = data1Re[0] + data2Re[0];
- temRe[4] = data1Re[0] - data2Re[0];
- temRe[1] = data1Re[1] + data2Re[1];
- temRe[5] = data1Re[1] - data2Re[1];
- temRe[2] = data1Re[2] + data2Re[2];
- temRe[6] = data1Re[2] - data2Re[2];
- temRe[3] = data1Re[3] + data2Re[3];
- temRe[7] = data1Re[3] - data2Re[3];
-
- temIm[0] = data1Im[0] + data2Im[0];
- temIm[4] = data1Im[0] - data2Im[0];
- temIm[1] = data1Im[1] + data2Im[1];
- temIm[5] = data1Im[1] - data2Im[1];
- temIm[2] = data1Im[2] + data2Im[2];
- temIm[6] = data1Im[2] - data2Im[2];
- temIm[3] = data1Im[3] + data2Im[3];
- temIm[7] = data1Im[3] - data2Im[3];
- } // End of function fft8().
-
- private void fft10() {
- double data1Re[] = new double[5];
- double data1Im[] = new double[5];
- double data2Re[] = new double[5];
- double data2Im[] = new double[5];
-
- // To improve the speed, use direct assaignment instead for loop here.
- data1Re[0] = temRe[0];
- data2Re[0] = temRe[5];
- data1Re[1] = temRe[2];
- data2Re[1] = temRe[7];
- data1Re[2] = temRe[4];
- data2Re[2] = temRe[9];
- data1Re[3] = temRe[6];
- data2Re[3] = temRe[1];
- data1Re[4] = temRe[8];
- data2Re[4] = temRe[3];
- data1Im[0] = temIm[0];
- data2Im[0] = temIm[5];
- data1Im[1] = temIm[2];
- data2Im[1] = temIm[7];
- data1Im[2] = temIm[4];
- data2Im[2] = temIm[9];
- data1Im[3] = temIm[6];
- data2Im[3] = temIm[1];
- data1Im[4] = temIm[8];
- data2Im[4] = temIm[3];
-
- fft5(data1Re, data1Im);
- fft5(data2Re, data2Im);
-
- temRe[0] = data1Re[0] + data2Re[0];
- temRe[5] = data1Re[0] - data2Re[0];
- temRe[6] = data1Re[1] + data2Re[1];
- temRe[1] = data1Re[1] - data2Re[1];
- temRe[2] = data1Re[2] + data2Re[2];
- temRe[7] = data1Re[2] - data2Re[2];
- temRe[8] = data1Re[3] + data2Re[3];
- temRe[3] = data1Re[3] - data2Re[3];
- temRe[4] = data1Re[4] + data2Re[4];
- temRe[9] = data1Re[4] - data2Re[4];
-
- temIm[0] = data1Im[0] + data2Im[0];
- temIm[5] = data1Im[0] - data2Im[0];
- temIm[6] = data1Im[1] + data2Im[1];
- temIm[1] = data1Im[1] - data2Im[1];
- temIm[2] = data1Im[2] + data2Im[2];
- temIm[7] = data1Im[2] - data2Im[2];
- temIm[8] = data1Im[3] + data2Im[3];
- temIm[3] = data1Im[3] - data2Im[3];
- temIm[4] = data1Im[4] + data2Im[4];
- temIm[9] = data1Im[4] - data2Im[4];
- } // End of function fft10().
-
- public double sqrt(double d) {
- return Math.sqrt(d);
- }
-
- private void fftPrime(int radix) {
- // Initial WRe, WIm.
- double W = 2 * (double) Math.PI / radix;
- double cosW = (double) Math.cos(W);
- double sinW = -(double) Math.sin(W);
- double WRe[] = new double[radix];
- double WIm[] = new double[radix];
-
- WRe[0] = 1;
- WIm[0] = 0;
- WRe[1] = cosW;
- WIm[1] = sinW;
-
- for (int i = 2; i < radix; i++) {
- WRe[i] = cosW * WRe[i - 1] - sinW * WIm[i - 1];
- WIm[i] = sinW * WRe[i - 1] + cosW * WIm[i - 1];
- }
-
- // FFT of prime length data, using DFT, can be improved in the future.
- double rere, reim, imre, imim;
- int j, k;
- int max = (radix + 1) / 2;
-
- double tem1Re[] = new double[max];
- double tem1Im[] = new double[max];
- double tem2Re[] = new double[max];
- double tem2Im[] = new double[max];
-
- for (j = 1; j < max; j++) {
- tem1Re[j] = temRe[j] + temRe[radix - j];
- tem1Im[j] = temIm[j] - temIm[radix - j];
- tem2Re[j] = temRe[j] - temRe[radix - j];
- tem2Im[j] = temIm[j] + temIm[radix - j];
- }
-
- for (j = 1; j < max; j++) {
- temRe[j] = temRe[0];
- temIm[j] = temIm[0];
- temRe[radix - j] = temRe[0];
- temIm[radix - j] = temIm[0];
- k = j;
- for (int i = 1; i < max; i++) {
- rere = WRe[k] * tem1Re[i];
- imim = WIm[k] * tem1Im[i];
- reim = WRe[k] * tem2Im[i];
- imre = WIm[k] * tem2Re[i];
-
- temRe[radix - j] += rere + imim;
- temIm[radix - j] += reim - imre;
- temRe[j] += rere - imim;
- temIm[j] += reim + imre;
-
- k = k + j;
- if (k >= radix)
- k = k - radix;
- }
- }
- for (j = 1; j < max; j++) {
- temRe[0] = temRe[0] + tem1Re[j];
- temIm[0] = temIm[0] + tem2Im[j];
- }
- } // End of function fftPrime().
-
-} // End of class FFT2d
+++ /dev/null
-//Title: 2-d mixed radix FFT.
-//Version:
-//Copyright: Copyright (c) 1998
-//Author: Dongyan Wang
-//Company: University of Wisconsin-Milwaukee.
-//Description:
-// . Use FFT1d to perform FFT2d.
-//
-
-public class FFT2d {
- //
- // Input of FFT, 2-d matrix.
- double dataRe[][], dataIm[][];
-
- // Width and height of 2-d matrix inputRe or inputIm.
- int width, height;
-
- // Constructor: 2-d FFT of Complex data.
- public FFT2d(double inputRe[], double inputIm[], int inputWidth) {
- // First make sure inputRe & inputIm are of the same length.
- if (inputRe.length != inputIm.length) {
- System.out.println("Error: the length of real part & imaginary part " +
- "of the input to 2-d FFT are different");
- return;
- } else {
- width = inputWidth;
- height = inputRe.length / width;
- dataRe = new double[height][width];
- dataIm = new double[height][width];
- //System.out.println("width = "+ width + " height = " + height + "\n");
-
- for (int i = 0; i < height; i++)
- for (int j = 0; j < width; j++) {
- dataRe[i][j] = inputRe[i * width + j];
- dataIm[i][j] = inputIm[i * width + j];
- }
-
- //System.out.println("Initially dataRe[100][8] = "+ dataRe[100][8] + "\n");
- //System.out.println("copy to Input[] inputRe[1008] = "+ inputRe[1008] + "\n");
-
- // Calculate FFT for each row of the data.
- FFT1d fft1 = new FFT1d(width);
- for (int i = 0; i < height; i++)
- fft1.fft(dataRe[i], dataIm[i]);
-
- //System.out.println("After row fft dataRe[100][8] = "+ dataRe[100][8] + "\n");
- //System.out.println("Element 100 is " + (int)inputRe[100]+ "\n");
- //System.out.println("Element 405 is " + (int)inputIm[405]+ "\n");
- // Tranpose data.
- // Calculate FFT for each column of the data.
- double temRe[][] = transpose(dataRe);
- double temIm[][] = transpose(dataIm);
-
- //System.out.println("before column fft dataRe[100][8] = "+ dataRe[100][8] + " temRe[8][100]= " + temRe[8][100] + "\n");
- FFT1d fft2 = new FFT1d(height);
- for (int j = 0; j < width; j++)
- fft2.fft(temRe[j], temIm[j]);
- //System.out.println("after column fft dataRe[100][8] = "+ dataRe[100][8] + " temRe[8][100]= " + temRe[8][100] + "\n");
-
- //System.out.println("Element 100 is " + (int)inputRe[100]+ "\n");
- //System.out.println("Element 405 is " + (int)inputIm[405]+ "\n");
- // Tranpose data.
- // Copy the result to input[], so the output can be
- // returned in the input array.
- for (int i = 0; i < height; i++)
- for (int j = 0; j < width; j++) {
- inputRe[i * width + j] = temRe[j][i];
- inputIm[i * width + j] = temIm[j][i];
- }
- //System.out.println("copy to Input[] inputRe[1008] = "+ inputRe[1008] + "\n");
- }
- }
-
- // Transpose matrix input.
- private double[][] transpose(double[][] input) {
- double[][] output = new double[width][height];
-
- for (int j = 0; j < width; j++)
- for (int i = 0; i < height; i++)
- output[j][i] = input[i][j];
-
- return output;
- } // End of function transpose().
-
-
- public static void main(String[] args) {
- int NUM_THREADS = 1;
- int SIZE = 800;
- int inputWidth = 10;
- if(args.length>0) {
- NUM_THREADS=Integer.parseInt(args[0]);
- if(args.length > 1)
- SIZE = Integer.parseInt(args[1]);
- }
-
- System.out.println("Num threads = " + NUM_THREADS + " SIZE= " + SIZE + "\n");
-
- // Initialize Matrix
- // Matrix inputRe, inputIm;
-
- double[] inputRe;
- double[] inputIm;
- inputRe = new double[SIZE];
- inputIm = new double[SIZE];
-
- for(int i = 0; i<SIZE; i++){
- inputRe[i] = i;
- inputIm[i] = i;
- }
-
- //System.out.println("Element 231567 is " + (int)inputRe[231567]+ "\n");
- //System.out.println("Element 10 is " + (int)inputIm[10]+ "\n");
- // Start Barrier Server
-
- // Width and height of 2-d matrix inputRe or inputIm.
- int width, height;
- width = inputWidth;
- int Relength, Imlength;
- height = inputRe.length / width;
- Relength = inputRe.length;
- Imlength = inputIm.length;
-
- // Create threads to do FFT
- FFT2d myfft2d = new FFT2d(inputRe, inputIm, inputWidth);
-
- System.out.println("2DFFT done! \n");
- //System.out.println("Element 23157 is " + (int)inputRe[23157]+ "\n");
- //System.out.println("Element 10 is " + (int)inputIm[10]+ "\n");
- }
-}
--- /dev/null
+//Title: 1-d mixed radix FFT.
+//Version:
+//Copyright: Copyright (c) 1998
+//Author: Dongyan Wang
+//Company: University of Wisconsin-Milwaukee.
+//Description:
+// The number of DFT is factorized.
+//
+// Some short FFTs, such as length 2, 3, 4, 5, 8, 10, are used
+// to improve the speed.
+//
+// Prime factors are processed using DFT. In the future, we can
+// improve this part.
+// Note: there is no limit how large the prime factor can be,
+// because for a set of data of an image, the length can be
+// random, ie. an image can have size 263 x 300, where 263 is
+// a large prime factor.
+//
+// A permute() function is used to make sure FFT can be calculated
+// in place.
+//
+// A triddle() function is used to perform the FFT.
+//
+// This program is for FFT of complex data, if the input is real,
+// the program can be further improved. Because I want to use the
+// same program to do IFFT, whose input is often complex, so I
+// still use this program.
+//
+// To save the memory and improve the speed, double data are used
+// instead of double, but I do have a double version transforms.fft.
+//
+// Factorize() is done in constructor, transforms.fft() is needed to be
+// called to do FFT, this is good for use in fft2d, then
+// factorize() is not needed for each row/column of data, since
+// each row/column of a matrix has the same length.
+//
+
+
+public class FFT1d {
+ // Maximum numbers of factors allowed.
+ //private static final int MaxFactorsNumber = 30;
+ private static final int MaxFactorsNumber = 37;
+
+ // cos2to3PI = cos(2*pi/3), using for 3 point FFT.
+ // cos(2*PI/3) is not -1.5
+ private static final double cos2to3PI = -1.5000f;
+ // sin2to3PI = sin(2*pi/3), using for 3 point FFT.
+ private static final double sin2to3PI = 8.6602540378444E-01f;
+
+ // TwotoFivePI = 2*pi/5.
+ // c51, c52, c53, c54, c55 are used in fft5().
+ // c51 =(cos(TwotoFivePI)+cos(2*TwotoFivePI))/2-1.
+ private static final double c51 = -1.25f;
+ // c52 =(cos(TwotoFivePI)-cos(2*TwotoFivePI))/2.
+ private static final double c52 = 5.5901699437495E-01f;
+ // c53 = -sin(TwotoFivePI).
+ private static final double c53 = -9.5105651629515E-01f;
+ // c54 =-(sin(TwotoFivePI)+sin(2*TwotoFivePI)).
+ private static final double c54 = -1.5388417685876E+00f;
+ // c55 =(sin(TwotoFivePI)-sin(2*TwotoFivePI)).
+ private static final double c55 = 3.6327126400268E-01f;
+
+ // OnetoSqrt2 = 1/sqrt(2), used in fft8().
+ private static final double OnetoSqrt2 = 7.0710678118655E-01f;
+
+ private static int lastRadix = 0;
+
+ int N; // length of N point FFT.
+ int NumofFactors; // Number of factors of N.
+ static final int maxFactor = 20; // Maximum factor of N.
+
+ int factors[]; // Factors of N processed in the current stage.
+ int sofar[]; // Finished factors before the current stage.
+ int remain[]; // Finished factors after the current stage.
+
+ double inputRe[], inputIm[]; // Input of FFT.
+ double temRe[], temIm[]; // Intermediate result of FFT.
+ double outputRe[], outputIm[]; // Output of FFT.
+ //static boolean factorsWerePrinted = false;
+ boolean factorsWerePrinted = false;
+
+ // Constructor: FFT of Complex data.
+ public FFT1d(int N) {
+ this.N = N;
+ outputRe = new double[N];
+ outputIm = new double[N];
+
+ factorize();
+ //printFactors();
+
+ // Allocate memory for intermediate result of FFT.
+ temRe = new double[maxFactor];
+ temIm = new double[maxFactor];
+ }
+
+ public void fft(double inputRe[], double inputIm[]) {
+ // First make sure inputRe & inputIm are of the same length.
+ if (inputRe.length != N || inputIm.length != N) {
+ System.out.println("Error: the length of real part & imaginary part " +
+ "of the input to 1-d FFT are different");
+ return;
+ } else {
+ this.inputRe = inputRe;
+ this.inputIm = inputIm;
+
+ permute();
+ //System.out.println("ready to twiddle");
+
+ for (int factorIndex = 0; factorIndex < NumofFactors; factorIndex++)
+ twiddle(factorIndex);
+ //System.out.println("ready to copy");
+
+ // Copy the output[] data to input[], so the output can be
+ // returned in the input array.
+ for (int i = 0; i < N; i++) {
+ inputRe[i] = outputRe[i];
+ inputIm[i] = outputIm[i];
+ }
+
+ }
+ }
+
+ public void printFactors() {
+ if (factorsWerePrinted) return;
+ factorsWerePrinted = true;
+ //System.out.println("factors.length = " + factors.length + "\n");
+ for (int i = 0; i < factors.length; i++)
+ System.out.println("factors[i] = " + factors[i]);
+ }
+
+ private void factorize() {
+ int radices[] = {2, 3, 4, 5, 8, 10};
+ int temFactors[] = new int[MaxFactorsNumber];
+
+ // 1 - point FFT, no need to factorize N.
+ if (N == 1) {
+ temFactors[0] = 1;
+ NumofFactors = 1;
+ }
+
+ // N - point FFT, N is needed to be factorized.
+ int n = N;
+ int index = 0; // index of temFactors.
+ int i = radices.length - 1;
+
+ while ((n > 1) && (i >= 0)) {
+ if ((n % radices[i]) == 0) {
+ n /= radices[i];
+ temFactors[index++] = radices[i];
+ } else
+ i--;
+ }
+
+ // Substitute 2x8 with 4x4.
+ // index>0, in the case only one prime factor, such as N=263.
+ if ((index > 0) && (temFactors[index - 1] == 2))
+ for (i = index - 2; i >= 0; i--)
+ if (temFactors[i] == 8) {
+ temFactors[index - 1] = temFactors[i] = 4;
+ // break out of for loop, because only one '2' will exist in
+ // temFactors, so only one substitutation is needed.
+ break;
+ }
+
+ if (n > 1) {
+ for (int k = 2; k < Math.sqrt(n) + 1; k++)
+ while ((n % k) == 0) {
+ n /= k;
+ temFactors[index++] = k;
+ }
+ if (n > 1) {
+ temFactors[index++] = n;
+ }
+ }
+ NumofFactors = index;
+ /*
+ if(temFactors[NumofFactors-1] > 10)
+ maxFactor = n;
+ else
+ maxFactor = 10;
+ */
+
+ // Inverse temFactors and store factors into factors[].
+ factors = new int[NumofFactors];
+ for (i = 0; i < NumofFactors; i++) {
+ factors[i] = temFactors[NumofFactors - i - 1];
+ }
+
+ // Calculate sofar[], remain[].
+ // sofar[] : finished factors before the current stage.
+ // factors[]: factors of N processed in the current stage.
+ // remain[] : finished factors after the current stage.
+ sofar = new int[NumofFactors];
+ remain = new int[NumofFactors];
+
+ remain[0] = N / factors[0];
+ sofar[0] = 1;
+ for (i = 1; i < NumofFactors; i++) {
+ sofar[i] = sofar[i - 1] * factors[i - 1];
+ remain[i] = remain[i - 1] / factors[i];
+ }
+ } // End of function factorize().
+
+ private void permute() {
+ int count[] = new int[MaxFactorsNumber];
+ int j;
+ int k = 0;
+
+ for (int i = 0; i < N - 1; i++) {
+ outputRe[i] = inputRe[k];
+ outputIm[i] = inputIm[k];
+ j = 0;
+ k = k + remain[j];
+ count[0] = count[0] + 1;
+ while (count[j] >= factors[j]) {
+ count[j] = 0;
+ k = k - (j == 0?N:remain[j - 1]) + remain[j + 1];
+ j++;
+ count[j] = count[j] + 1;
+ }
+ }
+ outputRe[N - 1] = inputRe[N - 1];
+ outputIm[N - 1] = inputIm[N - 1];
+ } // End of function permute().
+
+ private void twiddle(int factorIndex) {
+ // Get factor data.
+ int sofarRadix = sofar[factorIndex];
+ int radix = factors[factorIndex];
+ int remainRadix = remain[factorIndex];
+
+ double tem; // Temporary variable to do data exchange.
+
+ double W = 2 * (double) Math.PI / (sofarRadix * radix);
+ double cosW = (double) Math.cos(W);
+ double sinW = -(double) Math.sin(W);
+
+ double twiddleRe[] = new double[radix];
+ double twiddleIm[] = new double[radix];
+ double twRe = 1.0f, twIm = 0f;
+
+ //Initialize twiddle addBk.address variables.
+ int dataOffset = 0, groupOffset = 0, address = 0;
+
+ for (int dataNo = 0; dataNo < sofarRadix; dataNo++) {
+ //System.out.println("datano="+dataNo);
+ if (sofarRadix > 1) {
+ twiddleRe[0] = 1.0f;
+ twiddleIm[0] = 0.0f;
+ twiddleRe[1] = twRe;
+ twiddleIm[1] = twIm;
+ for (int i = 2; i < radix; i++) {
+
+
+ twiddleRe[i] = twRe * twiddleRe[i - 1] - twIm * twiddleIm[i - 1];
+ twiddleIm[i] = twIm * twiddleRe[i - 1] + twRe * twiddleIm[i - 1];
+ }
+ tem = cosW * twRe - sinW * twIm;
+ twIm = sinW * twRe + cosW * twIm;
+ twRe = tem;
+ }
+ for (int groupNo = 0; groupNo < remainRadix; groupNo++) {
+ //System.out.println("groupNo="+groupNo);
+ if ((sofarRadix > 1) && (dataNo > 0)) {
+ temRe[0] = outputRe[address];
+ temIm[0] = outputIm[address];
+ int blockIndex = 1;
+ do {
+ address = address + sofarRadix;
+ temRe[blockIndex] = twiddleRe[blockIndex] * outputRe[address] -
+ twiddleIm[blockIndex] * outputIm[address];
+ temIm[blockIndex] = twiddleRe[blockIndex] * outputIm[address] +
+ twiddleIm[blockIndex] * outputRe[address];
+ blockIndex++;
+ } while (blockIndex < radix);
+ } else
+ for (int i = 0; i < radix; i++) {
+ //System.out.println("temRe.length="+temRe.length);
+ //System.out.println("i = "+i);
+ temRe[i] = outputRe[address];
+ temIm[i] = outputIm[address];
+ address += sofarRadix;
+ }
+ //System.out.println("radix="+radix);
+ switch (radix) {
+ case 2:
+ tem = temRe[0] + temRe[1];
+ temRe[1] = temRe[0] - temRe[1];
+ temRe[0] = tem;
+ tem = temIm[0] + temIm[1];
+ temIm[1] = temIm[0] - temIm[1];
+ temIm[0] = tem;
+ break;
+ case 3:
+ double t1Re = temRe[1] + temRe[2];
+ double t1Im = temIm[1] + temIm[2];
+ temRe[0] = temRe[0] + t1Re;
+ temIm[0] = temIm[0] + t1Im;
+
+ double m1Re = cos2to3PI * t1Re;
+ double m1Im = cos2to3PI * t1Im;
+ double m2Re = sin2to3PI * (temIm[1] - temIm[2]);
+ double m2Im = sin2to3PI * (temRe[2] - temRe[1]);
+ double s1Re = temRe[0] + m1Re;
+ double s1Im = temIm[0] + m1Im;
+
+ temRe[1] = s1Re + m2Re;
+ temIm[1] = s1Im + m2Im;
+ temRe[2] = s1Re - m2Re;
+ temIm[2] = s1Im - m2Im;
+ break;
+ case 4:
+ fft4(temRe, temIm);
+ break;
+ case 5:
+ fft5(temRe, temIm);
+ break;
+ case 8:
+ fft8();
+ break;
+ case 10:
+ fft10();
+ break;
+ default :
+ fftPrime(radix);
+ break;
+ }
+ address = groupOffset;
+ for (int i = 0; i < radix; i++) {
+ outputRe[address] = temRe[i];
+ outputIm[address] = temIm[i];
+ address += sofarRadix;
+ }
+ groupOffset += sofarRadix * radix;
+ address = groupOffset;
+ }
+ groupOffset = ++dataOffset;
+ address = groupOffset;
+ }
+ } // End of function twiddle().
+
+ // The two arguments dataRe[], dataIm[] are mainly for using in fft8();
+ private void fft4(double dataRe[], double dataIm[]) {
+ double t1Re,t1Im, t2Re,t2Im;
+ double m2Re,m2Im, m3Re,m3Im;
+
+ t1Re = dataRe[0] + dataRe[2];
+ t1Im = dataIm[0] + dataIm[2];
+ t2Re = dataRe[1] + dataRe[3];
+ t2Im = dataIm[1] + dataIm[3];
+
+ m2Re = dataRe[0] - dataRe[2];
+ m2Im = dataIm[0] - dataIm[2];
+ m3Re = dataIm[1] - dataIm[3];
+ m3Im = dataRe[3] - dataRe[1];
+
+ dataRe[0] = t1Re + t2Re;
+ dataIm[0] = t1Im + t2Im;
+ dataRe[2] = t1Re - t2Re;
+ dataIm[2] = t1Im - t2Im;
+ dataRe[1] = m2Re + m3Re;
+ dataIm[1] = m2Im + m3Im;
+ dataRe[3] = m2Re - m3Re;
+ dataIm[3] = m2Im - m3Im;
+ } // End of function fft4().
+
+ // The two arguments dataRe[], dataIm[] are mainly for using in fft10();
+ private void fft5(double dataRe[], double dataIm[]) {
+ double t1Re,t1Im, t2Re,t2Im, t3Re,t3Im, t4Re,t4Im, t5Re,t5Im;
+ double m1Re,m1Im, m2Re,m2Im, m3Re,m3Im, m4Re,m4Im, m5Re,m5Im;
+ double s1Re,s1Im, s2Re,s2Im, s3Re,s3Im, s4Re,s4Im, s5Re,s5Im;
+
+ t1Re = dataRe[1] + dataRe[4];
+ t1Im = dataIm[1] + dataIm[4];
+ t2Re = dataRe[2] + dataRe[3];
+ t2Im = dataIm[2] + dataIm[3];
+ t3Re = dataRe[1] - dataRe[4];
+ t3Im = dataIm[1] - dataIm[4];
+ t4Re = dataRe[3] - dataRe[2];
+ t4Im = dataIm[3] - dataIm[2];
+ t5Re = t1Re + t2Re;
+ t5Im = t1Im + t2Im;
+
+ dataRe[0] = dataRe[0] + t5Re;
+ dataIm[0] = dataIm[0] + t5Im;
+
+ m1Re = c51 * t5Re;
+ m1Im = c51 * t5Im;
+ m2Re = c52 * (t1Re - t2Re);
+ m2Im = c52 * (t1Im - t2Im);
+ m3Re = -c53 * (t3Im + t4Im);
+ m3Im = c53 * (t3Re + t4Re);
+ m4Re = -c54 * t4Im;
+ m4Im = c54 * t4Re;
+ m5Re = -c55 * t3Im;
+ m5Im = c55 * t3Re;
+
+ s3Re = m3Re - m4Re;
+ s3Im = m3Im - m4Im;
+ s5Re = m3Re + m5Re;
+ s5Im = m3Im + m5Im;
+ s1Re = dataRe[0] + m1Re;
+ s1Im = dataIm[0] + m1Im;
+ s2Re = s1Re + m2Re;
+ s2Im = s1Im + m2Im;
+ s4Re = s1Re - m2Re;
+ s4Im = s1Im - m2Im;
+
+ dataRe[1] = s2Re + s3Re;
+ dataIm[1] = s2Im + s3Im;
+ dataRe[2] = s4Re + s5Re;
+ dataIm[2] = s4Im + s5Im;
+ dataRe[3] = s4Re - s5Re;
+ dataIm[3] = s4Im - s5Im;
+ dataRe[4] = s2Re - s3Re;
+ dataIm[4] = s2Im - s3Im;
+ } // End of function fft5().
+
+ private void fft8() {
+ double data1Re[] = new double[4];
+ double data1Im[] = new double[4];
+ double data2Re[] = new double[4];
+ double data2Im[] = new double[4];
+ double tem;
+
+ // To improve the speed, use direct assaignment instead for loop here.
+ data1Re[0] = temRe[0];
+ data2Re[0] = temRe[1];
+ data1Re[1] = temRe[2];
+ data2Re[1] = temRe[3];
+ data1Re[2] = temRe[4];
+ data2Re[2] = temRe[5];
+ data1Re[3] = temRe[6];
+ data2Re[3] = temRe[7];
+
+ data1Im[0] = temIm[0];
+ data2Im[0] = temIm[1];
+ data1Im[1] = temIm[2];
+ data2Im[1] = temIm[3];
+ data1Im[2] = temIm[4];
+ data2Im[2] = temIm[5];
+ data1Im[3] = temIm[6];
+ data2Im[3] = temIm[7];
+
+ fft4(data1Re, data1Im);
+ fft4(data2Re, data2Im);
+
+ tem = OnetoSqrt2 * (data2Re[1] + data2Im[1]);
+ data2Im[1] = OnetoSqrt2 * (data2Im[1] - data2Re[1]);
+ data2Re[1] = tem;
+ tem = data2Im[2];
+ data2Im[2] = -data2Re[2];
+ data2Re[2] = tem;
+ tem = OnetoSqrt2 * (data2Im[3] - data2Re[3]);
+ data2Im[3] = -OnetoSqrt2 * (data2Re[3] + data2Im[3]);
+ data2Re[3] = tem;
+
+ temRe[0] = data1Re[0] + data2Re[0];
+ temRe[4] = data1Re[0] - data2Re[0];
+ temRe[1] = data1Re[1] + data2Re[1];
+ temRe[5] = data1Re[1] - data2Re[1];
+ temRe[2] = data1Re[2] + data2Re[2];
+ temRe[6] = data1Re[2] - data2Re[2];
+ temRe[3] = data1Re[3] + data2Re[3];
+ temRe[7] = data1Re[3] - data2Re[3];
+
+ temIm[0] = data1Im[0] + data2Im[0];
+ temIm[4] = data1Im[0] - data2Im[0];
+ temIm[1] = data1Im[1] + data2Im[1];
+ temIm[5] = data1Im[1] - data2Im[1];
+ temIm[2] = data1Im[2] + data2Im[2];
+ temIm[6] = data1Im[2] - data2Im[2];
+ temIm[3] = data1Im[3] + data2Im[3];
+ temIm[7] = data1Im[3] - data2Im[3];
+ } // End of function fft8().
+
+ private void fft10() {
+ double data1Re[] = new double[5];
+ double data1Im[] = new double[5];
+ double data2Re[] = new double[5];
+ double data2Im[] = new double[5];
+
+ // To improve the speed, use direct assaignment instead for loop here.
+ data1Re[0] = temRe[0];
+ data2Re[0] = temRe[5];
+ data1Re[1] = temRe[2];
+ data2Re[1] = temRe[7];
+ data1Re[2] = temRe[4];
+ data2Re[2] = temRe[9];
+ data1Re[3] = temRe[6];
+ data2Re[3] = temRe[1];
+ data1Re[4] = temRe[8];
+ data2Re[4] = temRe[3];
+ data1Im[0] = temIm[0];
+ data2Im[0] = temIm[5];
+ data1Im[1] = temIm[2];
+ data2Im[1] = temIm[7];
+ data1Im[2] = temIm[4];
+ data2Im[2] = temIm[9];
+ data1Im[3] = temIm[6];
+ data2Im[3] = temIm[1];
+ data1Im[4] = temIm[8];
+ data2Im[4] = temIm[3];
+
+ fft5(data1Re, data1Im);
+ fft5(data2Re, data2Im);
+
+ temRe[0] = data1Re[0] + data2Re[0];
+ temRe[5] = data1Re[0] - data2Re[0];
+ temRe[6] = data1Re[1] + data2Re[1];
+ temRe[1] = data1Re[1] - data2Re[1];
+ temRe[2] = data1Re[2] + data2Re[2];
+ temRe[7] = data1Re[2] - data2Re[2];
+ temRe[8] = data1Re[3] + data2Re[3];
+ temRe[3] = data1Re[3] - data2Re[3];
+ temRe[4] = data1Re[4] + data2Re[4];
+ temRe[9] = data1Re[4] - data2Re[4];
+
+ temIm[0] = data1Im[0] + data2Im[0];
+ temIm[5] = data1Im[0] - data2Im[0];
+ temIm[6] = data1Im[1] + data2Im[1];
+ temIm[1] = data1Im[1] - data2Im[1];
+ temIm[2] = data1Im[2] + data2Im[2];
+ temIm[7] = data1Im[2] - data2Im[2];
+ temIm[8] = data1Im[3] + data2Im[3];
+ temIm[3] = data1Im[3] - data2Im[3];
+ temIm[4] = data1Im[4] + data2Im[4];
+ temIm[9] = data1Im[4] - data2Im[4];
+ } // End of function fft10().
+
+ public double sqrt(double d) {
+ return Math.sqrt(d);
+ }
+
+ private void fftPrime(int radix) {
+ // Initial WRe, WIm.
+ double W = 2 * (double) Math.PI / radix;
+ double cosW = (double) Math.cos(W);
+ double sinW = -(double) Math.sin(W);
+ double WRe[] = new double[radix];
+ double WIm[] = new double[radix];
+
+ WRe[0] = 1;
+ WIm[0] = 0;
+ WRe[1] = cosW;
+ WIm[1] = sinW;
+
+ for (int i = 2; i < radix; i++) {
+ WRe[i] = cosW * WRe[i - 1] - sinW * WIm[i - 1];
+ WIm[i] = sinW * WRe[i - 1] + cosW * WIm[i - 1];
+ }
+
+ // FFT of prime length data, using DFT, can be improved in the future.
+ double rere, reim, imre, imim;
+ int j, k;
+ int max = (radix + 1) / 2;
+
+ double tem1Re[] = new double[max];
+ double tem1Im[] = new double[max];
+ double tem2Re[] = new double[max];
+ double tem2Im[] = new double[max];
+
+ for (j = 1; j < max; j++) {
+ tem1Re[j] = temRe[j] + temRe[radix - j];
+ tem1Im[j] = temIm[j] - temIm[radix - j];
+ tem2Re[j] = temRe[j] - temRe[radix - j];
+ tem2Im[j] = temIm[j] + temIm[radix - j];
+ }
+
+ for (j = 1; j < max; j++) {
+ temRe[j] = temRe[0];
+ temIm[j] = temIm[0];
+ temRe[radix - j] = temRe[0];
+ temIm[radix - j] = temIm[0];
+ k = j;
+ for (int i = 1; i < max; i++) {
+ rere = WRe[k] * tem1Re[i];
+ imim = WIm[k] * tem1Im[i];
+ reim = WRe[k] * tem2Im[i];
+ imre = WIm[k] * tem2Re[i];
+
+ temRe[radix - j] += rere + imim;
+ temIm[radix - j] += reim - imre;
+ temRe[j] += rere - imim;
+ temIm[j] += reim + imre;
+
+ k = k + j;
+ if (k >= radix)
+ k = k - radix;
+ }
+ }
+ for (j = 1; j < max; j++) {
+ temRe[0] = temRe[0] + tem1Re[j];
+ temIm[0] = temIm[0] + tem2Im[j];
+ }
+ } // End of function fftPrime().
+
+} // End of class FFT2d
--- /dev/null
+//Title: 2-d mixed radix FFT.
+//Version:
+//Copyright: Copyright (c) 1998
+//Author: Dongyan Wang
+//Company: University of Wisconsin-Milwaukee.
+//Description:
+// . Use FFT1d to perform FFT2d.
+//
+
+public class FFT2d {
+ //
+ // Input of FFT, 2-d matrix.
+ double dataRe[][], dataIm[][];
+
+ // Width and height of 2-d matrix inputRe or inputIm.
+ int width, height;
+
+ // Constructor: 2-d FFT of Complex data.
+ public FFT2d(double inputRe[], double inputIm[], int inputWidth) {
+ // First make sure inputRe & inputIm are of the same length.
+ if (inputRe.length != inputIm.length) {
+ System.out.println("Error: the length of real part & imaginary part " +
+ "of the input to 2-d FFT are different");
+ return;
+ } else {
+ width = inputWidth;
+ height = inputRe.length / width;
+ dataRe = new double[height][width];
+ dataIm = new double[height][width];
+ //System.out.println("width = "+ width + " height = " + height + "\n");
+
+ for (int i = 0; i < height; i++)
+ for (int j = 0; j < width; j++) {
+ dataRe[i][j] = inputRe[i * width + j];
+ dataIm[i][j] = inputIm[i * width + j];
+ }
+
+ //System.out.println("Initially dataRe[100][8] = "+ dataRe[100][8] + "\n");
+ //System.out.println("copy to Input[] inputRe[1008] = "+ inputRe[1008] + "\n");
+
+ // Calculate FFT for each row of the data.
+ FFT1d fft1 = new FFT1d(width);
+ for (int i = 0; i < height; i++)
+ fft1.fft(dataRe[i], dataIm[i]);
+
+ //System.out.println("After row fft dataRe[100][8] = "+ dataRe[100][8] + "\n");
+ //System.out.println("Element 100 is " + (int)inputRe[100]+ "\n");
+ //System.out.println("Element 405 is " + (int)inputIm[405]+ "\n");
+ // Tranpose data.
+ // Calculate FFT for each column of the data.
+ double temRe[][] = transpose(dataRe);
+ double temIm[][] = transpose(dataIm);
+
+ //System.out.println("before column fft dataRe[100][8] = "+ dataRe[100][8] + " temRe[8][100]= " + temRe[8][100] + "\n");
+ FFT1d fft2 = new FFT1d(height);
+ for (int j = 0; j < width; j++)
+ fft2.fft(temRe[j], temIm[j]);
+ //System.out.println("after column fft dataRe[100][8] = "+ dataRe[100][8] + " temRe[8][100]= " + temRe[8][100] + "\n");
+
+ //System.out.println("Element 100 is " + (int)inputRe[100]+ "\n");
+ //System.out.println("Element 405 is " + (int)inputIm[405]+ "\n");
+ // Tranpose data.
+ // Copy the result to input[], so the output can be
+ // returned in the input array.
+ for (int i = 0; i < height; i++)
+ for (int j = 0; j < width; j++) {
+ inputRe[i * width + j] = temRe[j][i];
+ inputIm[i * width + j] = temIm[j][i];
+ }
+ //System.out.println("copy to Input[] inputRe[1008] = "+ inputRe[1008] + "\n");
+ }
+ }
+
+ // Transpose matrix input.
+ private double[][] transpose(double[][] input) {
+ double[][] output = new double[width][height];
+
+ for (int j = 0; j < width; j++)
+ for (int i = 0; i < height; i++)
+ output[j][i] = input[i][j];
+
+ return output;
+ } // End of function transpose().
+
+
+ public static void main(String[] args) {
+ int NUM_THREADS = 1;
+ int SIZE = 800;
+ int inputWidth = 10;
+ if(args.length>0) {
+ NUM_THREADS=Integer.parseInt(args[0]);
+ if(args.length > 1)
+ SIZE = Integer.parseInt(args[1]);
+ }
+
+ System.out.println("Num threads = " + NUM_THREADS + " SIZE= " + SIZE + "\n");
+
+ // Initialize Matrix
+ // Matrix inputRe, inputIm;
+
+ double[] inputRe;
+ double[] inputIm;
+ inputRe = new double[SIZE];
+ inputIm = new double[SIZE];
+
+ for(int i = 0; i<SIZE; i++){
+ inputRe[i] = i;
+ inputIm[i] = i;
+ }
+
+ //System.out.println("Element 231567 is " + (int)inputRe[231567]+ "\n");
+ //System.out.println("Element 10 is " + (int)inputIm[10]+ "\n");
+ // Start Barrier Server
+
+ // Width and height of 2-d matrix inputRe or inputIm.
+ int width, height;
+ width = inputWidth;
+ int Relength, Imlength;
+ height = inputRe.length / width;
+ Relength = inputRe.length;
+ Imlength = inputIm.length;
+
+ // Create threads to do FFT
+ FFT2d myfft2d = new FFT2d(inputRe, inputIm, inputWidth);
+
+ System.out.println("2DFFT done! \n");
+ //System.out.println("Element 23157 is " + (int)inputRe[23157]+ "\n");
+ //System.out.println("Element 10 is " + (int)inputIm[10]+ "\n");
+ }
+}
--- /dev/null
+MAINCLASS=FFT2d
+SRC=${MAINCLASS}.java FFT1d.java
+default:
+ ../../../../buildscript -optimize -thread -debug -mainclass ${MAINCLASS} ${SRC} -o ${MAINCLASS}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
FLAGS3=-dsm -optimize -mainclass ${MAINCLASS} -trueprob 0.85
default:
-# ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}NP ${SRC}
-# ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
../../../../buildscript ${FLAGS3} -o ${MAINCLASS}NPNC ${SRC3}
-# ../../../../buildscript ${FLAGS2} -o ${MAINCLASS}NP ${SRC3}
../../../../buildscript ${FLAGS1} -o ${MAINCLASS}N ${SRC3}
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}1NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}1NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}1.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}2NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}2NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}2.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}3NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}3NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}3.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}4NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}4NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}4.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}5NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}5NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}5.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}6NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}6NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}6.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}7NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}7NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}7.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}8NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}8NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}8.bin
clean:
rm -rf tmpbuilddirectory
+++ /dev/null
-public class MatrixMultiply extends Thread{
- MMul mmul;
- public int x0, y0, x1, y1;
-
- public MatrixMultiply(MMul mmul, int x0, int y0, int x1, int y1) {
- this.mmul = mmul;
- this.x0 = x0;
- this.y0 = y0;
- this.x1 = x1;
- this.y1 = y1;
- }
-
- public void run() {
- double localresults[][];
-
- atomic {
- //compute the results
- localresults=new double[1+x1-x0][1+y1-y0];
- double la[][]=mmul.a;
- double lbtranspose[][]=mmul.b;
- double lc[][]=mmul.c;
- int M=mmul.M;
-
- //Use b transpose for cache performance
- for(int i = x0; i<= x1; i++){
- double a[]=la[i];
- for (int j = y0; j <= y1; j++) {
- double innerProduct=0;
- double b[] = lbtranspose[j];
- for(int k = 0; k < M; k++) {
- innerProduct += a[k] *b[k];
- }
- localresults[i-x0][j-y0]=innerProduct;
- }
- }
- }
-
- atomic {
- //write the results
- for(int i=x0;i<=x1;i++) {
- double c[]=mmul.c[i];
- for(int j=y0;j<=y1;j++) {
- c[j]=localresults[i-x0][j-y0];
- }
- }
- }
- }
-
- public static void main(String[] args) {
- int NUM_THREADS = 4;
- int[] mid = new int[NUM_THREADS];
- mid[0] = (128<<24)|(195<<16)|(175<<8)|69;
- mid[1] = (128<<24)|(195<<16)|(175<<8)|73;
- mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
- mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
- int p, q, r;
- MatrixMultiply[] mm;
- MatrixMultiply tmp;
- MMul matrix;
-
- atomic {
- matrix = global new MMul(400, 400, 400);
- matrix.setValues();
- matrix.transpose();
- }
-
- atomic{
- mm = global new MatrixMultiply[NUM_THREADS];
- }
-
- atomic {
- mm[0] = global new MatrixMultiply(matrix,0,0,200,200);
- mm[1] = global new MatrixMultiply(matrix,0,201,200,399);
- mm[2] = global new MatrixMultiply(matrix,201,0,399,200);
- mm[3] = global new MatrixMultiply(matrix,201,201,399,399);
- }
-
- atomic {
- p = matrix.L;
- q = matrix.M;
- r = matrix.N;
- }
-
- // print out the matrices to be multiplied
- System.printString("\n");
- System.printString("MatrixMultiply: L=");
- System.printInt(p);
- System.printString("\t");
- System.printString("M=");
- System.printInt(q);
- System.printString("\t");
- System.printString("N=");
- System.printInt(r);
- System.printString("\n");
-
- // start a thread to compute each c[l,n]
- for (int i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = mm[i];
- }
- tmp.start(mid[i]);
- }
-
- // wait for them to finish
- for (int i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = mm[i];
- }
- tmp.join();
- }
-
- // print out the result of the matrix multiply
- System.printString("Starting\n");
- System.printString("Matrix Product c =\n");
- double val;
- atomic {
- for (int i = 0; i < p; i++) {
- double c[]=matrix.c[i];
- for (int j = 0; j < r; j++) {
- val = c[j];
- }
- }
- }
- System.printString("Finished\n");
- }
-}
-
-public class MMul{
-
- public int L, M, N;
- public double[][] a;
- public double[][] b;
- public double[][] c;
- public double[][] btranspose;
-
- public MMul(int L, int M, int N) {
- this.L = L;
- this.M = M;
- this.N = N;
- a = global new double[L][M];
- b = global new double[M][N];
- c = global new double[L][N];
- btranspose = global new double[N][M];
- }
-
- public void setValues() {
- for(int i = 0; i < L; i++) {
- double ai[] = a[i];
- for(int j = 0; j < M; j++) {
- ai[j] = j+1;
- }
- }
-
- for(int i = 0; i < M; i++) {
- double bi[] = b[i];
- for(int j = 0; j < N; j++) {
- bi[j] = j+1;
- }
- }
-
- for(int i = 0; i < L; i++) {
- double ci[] = c[i];
- for(int j = 0; j < N; j++) {
- ci[j] = 0;
- }
- }
- for(int i = 0; i < N; i++) {
- double btransposei[] = btranspose[i];
- for(int j = 0; j < M; j++) {
- btransposei[j] = 0;
- }
- }
- }
-
- public void transpose() {
- for(int row = 0; row < M; row++) {
- double brow[] = b[row];
- for(int col = 0; col < N; col++) {
- btranspose[col][row] = brow[col];
- }
- }
- }
-}
+++ /dev/null
-public class MatrixMultiply extends Thread{
- MMul mmul;
- public int x0, y0, x1, y1;
- public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
- this.mmul = mmul;
- this.x0 = x0;
- this.y0 = y0;
- this.x1 = x1;
- this.y1 = y1;
- }
-
- public void run() {
- atomic {
- double la[][]=mmul.a;
- double lc[][]=mmul.c;
- double lb[][]=mmul.btranspose;
- int M=mmul.M;
-
- //Use btranspose for cache performance
- for(int i = x0; i< x1; i++){
- double a[]=la[i];
- double c[]=lc[i];
- for (int j = y0; j < y1; j++) {
- double innerProduct=0;
- double b[] = lb[j];
- for(int k = 0; k < M; k++) {
- innerProduct += a[k] *b[k];
- }
- c[j]=innerProduct;
- }
- }
- }
- }
-
- public static void main(String[] args) {
- int NUM_THREADS = 4;
- int SIZE=600;
- if (args.length>0) {
- NUM_THREADS=Integer.parseInt(args[0]);
- if (args.length>1)
- SIZE=Integer.parseInt(args[1]);
- }
-
- int[] mid = new int[8];
- mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
- mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dw-11
- mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dw-12
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dw-13
- mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dw-14
- mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dw-15
- mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dw-16
- mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dw-17
-
- int p, q, r;
- MatrixMultiply[] mm;
- MatrixMultiply tmp;
- MMul matrix;
-
- atomic {
- matrix = global new MMul(SIZE, SIZE, SIZE);
- matrix.setValues();
- matrix.transpose();
- mm = global new MatrixMultiply[NUM_THREADS];
- int increment=SIZE/NUM_THREADS;
- int base=0;
- for(int i=0;i<NUM_THREADS;i++) {
- if ((i+1)==NUM_THREADS)
- mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
- else
- mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
- base+=increment;
- }
- p = matrix.L;
- q = matrix.M;
- r = matrix.N;
- }
-
- // print out the matrices to be multiplied
- System.printString("\n");
- System.printString("MatrixMultiply: L=");
- System.printInt(p);
- System.printString("\t");
- System.printString("M=");
- System.printInt(q);
- System.printString("\t");
- System.printString("N=");
- System.printInt(r);
- System.printString("\n");
-
- // start a thread to compute each c[l,n]
- for (int i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = mm[i];
- }
- tmp.start(mid[i]);
- }
-
-
- // wait for them to finish
- for (int i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = mm[i];
- }
- tmp.join();
- }
-
- // print out the result of the matrix multiply
-
- System.printString("Finished\n");
- }
-}
-
-public class MMul{
-
- public int L, M, N;
- public double[][] a;
- public double[][] b;
- public double[][] c;
- public double[][] btranspose;
-
- public MMul(int L, int M, int N) {
- this.L = L;
- this.M = M;
- this.N = N;
- a = global new double[L][M];
- b = global new double[M][N];
- c = global new double[L][N];
- btranspose = global new double[N][M];
- }
-
- public void setValues() {
- for(int i = 0; i < L; i++) {
- double ai[] = a[i];
- for(int j = 0; j < M; j++) {
- ai[j] = j+1;
- }
- }
-
- for(int i = 0; i < M; i++) {
- double bi[] = b[i];
- for(int j = 0; j < N; j++) {
- bi[j] = j+1;
- }
- }
-
- for(int i = 0; i < L; i++) {
- double ci[] = c[i];
- for(int j = 0; j < N; j++) {
- ci[j] = 0;
- }
- }
- for(int i = 0; i < N; i++) {
- double btransposei[] = btranspose[i];
- for(int j = 0; j < M; j++) {
- btransposei[j] = 0;
- }
- }
- }
-
- public void transpose() {
- for(int row = 0; row < M; row++) {
- double brow[] = b[row];
- for(int col = 0; col < N; col++) {
- btranspose[col][row] = brow[col];
- }
- }
- }
-}
+++ /dev/null
-public class MatrixMultiply extends Thread{
- MMul mmul;
- public int x0, y0, x1, y1;
-
- public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
- this.mmul = mmul;
- this.x0 = x0;
- this.y0 = y0;
- this.x1 = x1;
- this.y1 = y1;
- }
-
- public void run() {
- atomic {
- double la[][][]=mmul.a;
- double lc[][][]=mmul.c;
- double lb[][][]=mmul.btranspose;
- int M=mmul.M;
- int P=mmul.P;
- //Use btranspose for cache performance
- for(int q=0;q<P;q++) {
- double ra[][]=la[q];
- double rb[][]=lb[q];
- double rc[][]=lc[q];
- for(int i = x0; i< x1; i++){
- double a[]=ra[i];
- double c[]=rc[i];
- for (int j = y0; j < y1; j++) {
- double innerProduct=0;
- double b[] = rb[j];
- for(int k = 0; k < M; k++) {
- innerProduct += a[k] *b[k];
- }
- c[j]=innerProduct;
- }
- }
- }
- }
- }
-
- public static void main(String[] args) {
- int NUM_THREADS = 4;
- int SIZE=150;
- int NUM_MATRIX = 1;
- if (args.length>0) {
- NUM_THREADS=Integer.parseInt(args[0]);
- if (args.length>1) {
- SIZE=Integer.parseInt(args[1]);
- if (args.length>2)
- NUM_MATRIX=Integer.parseInt(args[2]);
- }
- }
-
- int[] mid = new int[8];
- int[] mid = new int[8];
- mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
- mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dw-11
- mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dw-12
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dw-13
- mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dw-14
- mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dw-15
- mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dw-16
- mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dw-17
-
- int p, q, r;
- MatrixMultiply[] mm;
- MatrixMultiply tmp;
- MMul matrix;
-
- atomic {
- matrix = global new MMul(NUM_MATRIX, SIZE, SIZE, SIZE);
- matrix.setValues();
- matrix.transpose();
- mm = global new MatrixMultiply[NUM_THREADS];
- int increment=SIZE/NUM_THREADS;
- int base=0;
- for(int i=0;i<NUM_THREADS;i++) {
- if ((i+1)==NUM_THREADS)
- mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
- else
- mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
- base+=increment;
- }
- p = matrix.L;
- q = matrix.M;
- r = matrix.N;
- }
-
- // start a thread to compute each c[l,n]
- for (int i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = mm[i];
- }
- tmp.start(mid[i]);
- }
-
- // wait for them to finish
- for (int i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = mm[i];
- }
- tmp.join();
- }
-
- // print out the result of the matrix multiply
- System.printString("Finished\n");
- }
-}
-
-public class MMul{
-
- public int L, M, N, P;
- public double[][][] a;
- public double[][][] b;
- public double[][][] c;
- public double[][][] btranspose;
-
- public MMul(int P, int L, int M, int N) {
- this.L = L;
- this.M = M;
- this.N = N;
- this.P = P;
- a = global new double[P][L][M];
- b = global new double[P][M][N];
- c = global new double[P][L][N];
- btranspose = global new double[P][N][M];
- }
-
- public void setValues() {
- for(int q = 0; q < P; q++) {
- for(int i = 0; i < L; i++) {
- double ai[] = a[q][i];
- for(int j = 0; j < M; j++) {
- ai[j] = j+1;
- }
- }
-
- for(int i = 0; i < M; i++) {
- double bi[] = b[q][i];
- for(int j = 0; j < N; j++) {
- bi[j] = j+1;
- }
- }
- }
- }
-
- public void transpose() {
- for(int q=0;q<P;q++) {
- double br[][]=b[q];
- double bt[][]=btranspose[q];
- for(int row = 0; row < M; row++) {
- double brow[] = br[row];
- for(int col = 0; col < N; col++) {
- bt[col][row] = brow[col];
- }
- }
- }
- }
-}
--- /dev/null
+public class MatrixMultiply extends Thread{
+ MMul mmul;
+ public int x0, y0, x1, y1;
+
+ public MatrixMultiply(MMul mmul, int x0, int y0, int x1, int y1) {
+ this.mmul = mmul;
+ this.x0 = x0;
+ this.y0 = y0;
+ this.x1 = x1;
+ this.y1 = y1;
+ }
+
+ public void run() {
+ double localresults[][];
+
+ atomic {
+ //compute the results
+ localresults=new double[1+x1-x0][1+y1-y0];
+ double la[][]=mmul.a;
+ double lbtranspose[][]=mmul.b;
+ double lc[][]=mmul.c;
+ int M=mmul.M;
+
+ //Use b transpose for cache performance
+ for(int i = x0; i<= x1; i++){
+ double a[]=la[i];
+ for (int j = y0; j <= y1; j++) {
+ double innerProduct=0;
+ double b[] = lbtranspose[j];
+ for(int k = 0; k < M; k++) {
+ innerProduct += a[k] *b[k];
+ }
+ localresults[i-x0][j-y0]=innerProduct;
+ }
+ }
+ }
+
+ atomic {
+ //write the results
+ for(int i=x0;i<=x1;i++) {
+ double c[]=mmul.c[i];
+ for(int j=y0;j<=y1;j++) {
+ c[j]=localresults[i-x0][j-y0];
+ }
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ int NUM_THREADS = 4;
+ int[] mid = new int[NUM_THREADS];
+ mid[0] = (128<<24)|(195<<16)|(175<<8)|69;
+ mid[1] = (128<<24)|(195<<16)|(175<<8)|73;
+ mid[2] = (128<<24)|(195<<16)|(175<<8)|78;
+ mid[3] = (128<<24)|(195<<16)|(175<<8)|79;
+ int p, q, r;
+ MatrixMultiply[] mm;
+ MatrixMultiply tmp;
+ MMul matrix;
+
+ atomic {
+ matrix = global new MMul(400, 400, 400);
+ matrix.setValues();
+ matrix.transpose();
+ }
+
+ atomic{
+ mm = global new MatrixMultiply[NUM_THREADS];
+ }
+
+ atomic {
+ mm[0] = global new MatrixMultiply(matrix,0,0,200,200);
+ mm[1] = global new MatrixMultiply(matrix,0,201,200,399);
+ mm[2] = global new MatrixMultiply(matrix,201,0,399,200);
+ mm[3] = global new MatrixMultiply(matrix,201,201,399,399);
+ }
+
+ atomic {
+ p = matrix.L;
+ q = matrix.M;
+ r = matrix.N;
+ }
+
+ // print out the matrices to be multiplied
+ System.printString("\n");
+ System.printString("MatrixMultiply: L=");
+ System.printInt(p);
+ System.printString("\t");
+ System.printString("M=");
+ System.printInt(q);
+ System.printString("\t");
+ System.printString("N=");
+ System.printInt(r);
+ System.printString("\n");
+
+ // start a thread to compute each c[l,n]
+ for (int i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = mm[i];
+ }
+ tmp.start(mid[i]);
+ }
+
+ // wait for them to finish
+ for (int i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = mm[i];
+ }
+ tmp.join();
+ }
+
+ // print out the result of the matrix multiply
+ System.printString("Starting\n");
+ System.printString("Matrix Product c =\n");
+ double val;
+ atomic {
+ for (int i = 0; i < p; i++) {
+ double c[]=matrix.c[i];
+ for (int j = 0; j < r; j++) {
+ val = c[j];
+ }
+ }
+ }
+ System.printString("Finished\n");
+ }
+}
+
+public class MMul{
+
+ public int L, M, N;
+ public double[][] a;
+ public double[][] b;
+ public double[][] c;
+ public double[][] btranspose;
+
+ public MMul(int L, int M, int N) {
+ this.L = L;
+ this.M = M;
+ this.N = N;
+ a = global new double[L][M];
+ b = global new double[M][N];
+ c = global new double[L][N];
+ btranspose = global new double[N][M];
+ }
+
+ public void setValues() {
+ for(int i = 0; i < L; i++) {
+ double ai[] = a[i];
+ for(int j = 0; j < M; j++) {
+ ai[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < M; i++) {
+ double bi[] = b[i];
+ for(int j = 0; j < N; j++) {
+ bi[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < L; i++) {
+ double ci[] = c[i];
+ for(int j = 0; j < N; j++) {
+ ci[j] = 0;
+ }
+ }
+ for(int i = 0; i < N; i++) {
+ double btransposei[] = btranspose[i];
+ for(int j = 0; j < M; j++) {
+ btransposei[j] = 0;
+ }
+ }
+ }
+
+ public void transpose() {
+ for(int row = 0; row < M; row++) {
+ double brow[] = b[row];
+ for(int col = 0; col < N; col++) {
+ btranspose[col][row] = brow[col];
+ }
+ }
+ }
+}
--- /dev/null
+public class MatrixMultiply extends Thread{
+ MMul mmul;
+ public int x0, y0, x1, y1;
+ public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
+ this.mmul = mmul;
+ this.x0 = x0;
+ this.y0 = y0;
+ this.x1 = x1;
+ this.y1 = y1;
+ }
+
+ public void run() {
+ atomic {
+ double la[][]=mmul.a;
+ double lc[][]=mmul.c;
+ double lb[][]=mmul.btranspose;
+ int M=mmul.M;
+
+ //Use btranspose for cache performance
+ for(int i = x0; i< x1; i++){
+ double a[]=la[i];
+ double c[]=lc[i];
+ for (int j = y0; j < y1; j++) {
+ double innerProduct=0;
+ double b[] = lb[j];
+ for(int k = 0; k < M; k++) {
+ innerProduct += a[k] *b[k];
+ }
+ c[j]=innerProduct;
+ }
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ int NUM_THREADS = 4;
+ int SIZE=600;
+ if (args.length>0) {
+ NUM_THREADS=Integer.parseInt(args[0]);
+ if (args.length>1)
+ SIZE=Integer.parseInt(args[1]);
+ }
+
+ int[] mid = new int[8];
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dw-11
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dw-12
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dw-13
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dw-14
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dw-15
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dw-16
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dw-17
+
+ int p, q, r;
+ MatrixMultiply[] mm;
+ MatrixMultiply tmp;
+ MMul matrix;
+
+ atomic {
+ matrix = global new MMul(SIZE, SIZE, SIZE);
+ matrix.setValues();
+ matrix.transpose();
+ mm = global new MatrixMultiply[NUM_THREADS];
+ int increment=SIZE/NUM_THREADS;
+ int base=0;
+ for(int i=0;i<NUM_THREADS;i++) {
+ if ((i+1)==NUM_THREADS)
+ mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
+ else
+ mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
+ base+=increment;
+ }
+ p = matrix.L;
+ q = matrix.M;
+ r = matrix.N;
+ }
+
+ // print out the matrices to be multiplied
+ System.printString("\n");
+ System.printString("MatrixMultiply: L=");
+ System.printInt(p);
+ System.printString("\t");
+ System.printString("M=");
+ System.printInt(q);
+ System.printString("\t");
+ System.printString("N=");
+ System.printInt(r);
+ System.printString("\n");
+
+ // start a thread to compute each c[l,n]
+ for (int i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = mm[i];
+ }
+ tmp.start(mid[i]);
+ }
+
+
+ // wait for them to finish
+ for (int i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = mm[i];
+ }
+ tmp.join();
+ }
+
+ // print out the result of the matrix multiply
+
+ System.printString("Finished\n");
+ }
+}
+
+public class MMul{
+
+ public int L, M, N;
+ public double[][] a;
+ public double[][] b;
+ public double[][] c;
+ public double[][] btranspose;
+
+ public MMul(int L, int M, int N) {
+ this.L = L;
+ this.M = M;
+ this.N = N;
+ a = global new double[L][M];
+ b = global new double[M][N];
+ c = global new double[L][N];
+ btranspose = global new double[N][M];
+ }
+
+ public void setValues() {
+ for(int i = 0; i < L; i++) {
+ double ai[] = a[i];
+ for(int j = 0; j < M; j++) {
+ ai[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < M; i++) {
+ double bi[] = b[i];
+ for(int j = 0; j < N; j++) {
+ bi[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < L; i++) {
+ double ci[] = c[i];
+ for(int j = 0; j < N; j++) {
+ ci[j] = 0;
+ }
+ }
+ for(int i = 0; i < N; i++) {
+ double btransposei[] = btranspose[i];
+ for(int j = 0; j < M; j++) {
+ btransposei[j] = 0;
+ }
+ }
+ }
+
+ public void transpose() {
+ for(int row = 0; row < M; row++) {
+ double brow[] = b[row];
+ for(int col = 0; col < N; col++) {
+ btranspose[col][row] = brow[col];
+ }
+ }
+ }
+}
--- /dev/null
+public class MatrixMultiply extends Thread{
+ MMul mmul;
+ public int x0, y0, x1, y1;
+
+ public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
+ this.mmul = mmul;
+ this.x0 = x0;
+ this.y0 = y0;
+ this.x1 = x1;
+ this.y1 = y1;
+ }
+
+ public void run() {
+ atomic {
+ double la[][][]=mmul.a;
+ double lc[][][]=mmul.c;
+ double lb[][][]=mmul.btranspose;
+ int M=mmul.M;
+ int P=mmul.P;
+ //Use btranspose for cache performance
+ for(int q=0;q<P;q++) {
+ double ra[][]=la[q];
+ double rb[][]=lb[q];
+ double rc[][]=lc[q];
+ for(int i = x0; i< x1; i++){
+ double a[]=ra[i];
+ double c[]=rc[i];
+ for (int j = y0; j < y1; j++) {
+ double innerProduct=0;
+ double b[] = rb[j];
+ for(int k = 0; k < M; k++) {
+ innerProduct += a[k] *b[k];
+ }
+ c[j]=innerProduct;
+ }
+ }
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ int NUM_THREADS = 4;
+ int SIZE=150;
+ int NUM_MATRIX = 1;
+ if (args.length>0) {
+ NUM_THREADS=Integer.parseInt(args[0]);
+ if (args.length>1) {
+ SIZE=Integer.parseInt(args[1]);
+ if (args.length>2)
+ NUM_MATRIX=Integer.parseInt(args[2]);
+ }
+ }
+
+ int[] mid = new int[8];
+ int[] mid = new int[8];
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dw-10
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dw-11
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dw-12
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dw-13
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dw-14
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dw-15
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dw-16
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dw-17
+
+ int p, q, r;
+ MatrixMultiply[] mm;
+ MatrixMultiply tmp;
+ MMul matrix;
+
+ atomic {
+ matrix = global new MMul(NUM_MATRIX, SIZE, SIZE, SIZE);
+ matrix.setValues();
+ matrix.transpose();
+ mm = global new MatrixMultiply[NUM_THREADS];
+ int increment=SIZE/NUM_THREADS;
+ int base=0;
+ for(int i=0;i<NUM_THREADS;i++) {
+ if ((i+1)==NUM_THREADS)
+ mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
+ else
+ mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
+ base+=increment;
+ }
+ p = matrix.L;
+ q = matrix.M;
+ r = matrix.N;
+ }
+
+ // start a thread to compute each c[l,n]
+ for (int i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = mm[i];
+ }
+ tmp.start(mid[i]);
+ }
+
+ // wait for them to finish
+ for (int i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = mm[i];
+ }
+ tmp.join();
+ }
+
+ // print out the result of the matrix multiply
+ System.printString("Finished\n");
+ }
+}
+
+public class MMul{
+
+ public int L, M, N, P;
+ public double[][][] a;
+ public double[][][] b;
+ public double[][][] c;
+ public double[][][] btranspose;
+
+ public MMul(int P, int L, int M, int N) {
+ this.L = L;
+ this.M = M;
+ this.N = N;
+ this.P = P;
+ a = global new double[P][L][M];
+ b = global new double[P][M][N];
+ c = global new double[P][L][N];
+ btranspose = global new double[P][N][M];
+ }
+
+ public void setValues() {
+ for(int q = 0; q < P; q++) {
+ for(int i = 0; i < L; i++) {
+ double ai[] = a[q][i];
+ for(int j = 0; j < M; j++) {
+ ai[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < M; i++) {
+ double bi[] = b[q][i];
+ for(int j = 0; j < N; j++) {
+ bi[j] = j+1;
+ }
+ }
+ }
+ }
+
+ public void transpose() {
+ for(int q=0;q<P;q++) {
+ double br[][]=b[q];
+ double bt[][]=btranspose[q];
+ for(int row = 0; row < M; row++) {
+ double brow[] = br[row];
+ for(int col = 0; col < N; col++) {
+ bt[col][row] = brow[col];
+ }
+ }
+ }
+ }
+}
--- /dev/null
+MAINCLASS=MatrixMultiply
+SRC1=${MAINCLASS}N.java
+SRC2=${MAINCLASS}Nrun.java
+FLAGS=-dsm -dsmcaching -prefetch -optimize -excprefetch MatrixMultiply.main -excprefetch MMul.setValues -excprefetch MMul.transpose -mainclass ${MAINCLASS} -trueprob 0.98
+FLAGS1=-dsm -optimize -dsmcaching -mainclass ${MAINCLASS}
+FLAGS2=-dsm -optimize -mainclass ${MAINCLASS}
+default:
+ ../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC ${SRC1}
+ ../../../buildscript ${FLAGS} -o ${MAINCLASS}N ${SRC1}
+ ../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC200 ${SRC2}
+ ../../../buildscript ${FLAGS} -o ${MAINCLASS}N200 ${SRC2}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+++ /dev/null
-public class MatrixMultiply extends Thread{
- MMul mmul;
- public int x0, y0, x1, y1;
-
- public MatrixMultiply(MMul mmul, int x0, int y0, int x1, int y1) {
- this.mmul = mmul;
- this.x0 = x0;
- this.y0 = y0;
- this.x1 = x1;
- this.y1 = y1;
- }
-
- public void run() {
- int localresults[][];
-
- //compute the results
- localresults=new int[1+x1-x0][1+y1-y0];
-
- //Use b transpose for cache performance
- for(int i = x0; i<= x1; i++){
- int a[]=mmul.a[i];
- int M=mmul.M;
- for (int j = y0; j <= y1; j++) {
- int innerProduct=0;
- int b[] = mmul.btranspose[j];
- for(int k = 0; k < M; k++) {
- innerProduct += a[k] *b[k];
- }
- localresults[i-x0][j-y0]=innerProduct;
- }
- }
-
- //write the results
- for(int i=x0;i<=x1;i++) {
- int c[]=mmul.c[i];
- for(int j=y0;j<=y1;j++) {
- c[j]=localresults[i-x0][j-y0];
- }
- }
- }
-
- public static void main(String[] args) {
- int NUM_THREADS = 1;
- int p, q, r;
- MatrixMultiply[] mm;
- MatrixMultiply tmp;
- MMul matrix;
-
- matrix = new MMul(400, 400, 400);
- matrix.setValues();
- matrix.transpose();
-
- mm = new MatrixMultiply[NUM_THREADS];
- mm[0] = new MatrixMultiply(matrix,0,0,399,399);
-
- p = matrix.L;
- q = matrix.M;
- r = matrix.N;
-
- // print out the matrices to be multiplied
- System.out.print("MatrixMultiply: L=");
- System.out.print(p);
- System.out.print("\t");
- System.out.print("M=");
- System.out.print(q);
- System.out.print("\t");
- System.out.print("N=");
- System.out.print(r);
- System.out.print("\n");
-
- // start a thread to compute each c[l,n]
- for (int i = 0; i < NUM_THREADS; i++) {
- tmp = mm[i];
- tmp.start();
- }
-
- // wait for them to finish
- for (int i = 0; i < NUM_THREADS; i++) {
- try {
- mm[i].join();
- } catch (InterruptedException e) {
- System.out.println("Join Error");
- }
-
- }
-
- // print out the result of the matrix multiply
- System.out.println("Starting\n");
- System.out.println("Matrix Product c =\n");
- int val;
- for (int i = 0; i < p; i++) {
- int c[]=matrix.c[i];
- for (int j = 0; j < r; j++) {
- val = c[j];
- }
- }
- System.out.println("Finished\n");
- }
-}
-
-class MMul{
-
- public int L, M, N;
- public int[][] a;
- public int[][] b;
- public int[][] c;
- public int[][] btranspose;
-
- public MMul(int L, int M, int N) {
- this.L = L;
- this.M = M;
- this.N = N;
- a = new int[L][M];
- b = new int[M][N];
- c = new int[L][N];
- btranspose = new int[N][M];
- }
-
- public void setValues() {
- for(int i = 0; i < L; i++) {
- int ai[] = a[i];
- for(int j = 0; j < M; j++) {
- ai[j] = j+1;
- }
- }
-
- for(int i = 0; i < M; i++) {
- int bi[] = b[i];
- for(int j = 0; j < N; j++) {
- bi[j] = j+1;
- }
- }
-
- for(int i = 0; i < L; i++) {
- int ci[] = c[i];
- for(int j = 0; j < N; j++) {
- ci[j] = 0;
- }
- }
- for(int i = 0; i < N; i++) {
- int btransposei[] = btranspose[i];
- for(int j = 0; j < M; j++) {
- btransposei[j] = 0;
- }
- }
- }
-
- public void transpose() {
- for(int row = 0; row < M; row++) {
- int brow[] = b[row];
- for(int col = 0; col < N; col++) {
- btranspose[col][row] = brow[col];
- }
- }
- }
-}
+++ /dev/null
-MAINCLASS=MatrixMultiply
-SRC=${MAINCLASS}N.java
-default:
- javac ${SRC}
-run:
- java ${MAINCLASS}
-
-clean:
- rm *.class
--- /dev/null
+public class MatrixMultiply extends Thread{
+ MMul mmul;
+ public int x0, y0, x1, y1;
+
+ public MatrixMultiply(MMul mmul, int x0, int y0, int x1, int y1) {
+ this.mmul = mmul;
+ this.x0 = x0;
+ this.y0 = y0;
+ this.x1 = x1;
+ this.y1 = y1;
+ }
+
+ public void run() {
+ int localresults[][];
+
+ //compute the results
+ localresults=new int[1+x1-x0][1+y1-y0];
+
+ //Use b transpose for cache performance
+ for(int i = x0; i<= x1; i++){
+ int a[]=mmul.a[i];
+ int M=mmul.M;
+ for (int j = y0; j <= y1; j++) {
+ int innerProduct=0;
+ int b[] = mmul.btranspose[j];
+ for(int k = 0; k < M; k++) {
+ innerProduct += a[k] *b[k];
+ }
+ localresults[i-x0][j-y0]=innerProduct;
+ }
+ }
+
+ //write the results
+ for(int i=x0;i<=x1;i++) {
+ int c[]=mmul.c[i];
+ for(int j=y0;j<=y1;j++) {
+ c[j]=localresults[i-x0][j-y0];
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ int NUM_THREADS = 1;
+ int p, q, r;
+ MatrixMultiply[] mm;
+ MatrixMultiply tmp;
+ MMul matrix;
+
+ matrix = new MMul(400, 400, 400);
+ matrix.setValues();
+ matrix.transpose();
+
+ mm = new MatrixMultiply[NUM_THREADS];
+ mm[0] = new MatrixMultiply(matrix,0,0,399,399);
+
+ p = matrix.L;
+ q = matrix.M;
+ r = matrix.N;
+
+ // print out the matrices to be multiplied
+ System.out.print("MatrixMultiply: L=");
+ System.out.print(p);
+ System.out.print("\t");
+ System.out.print("M=");
+ System.out.print(q);
+ System.out.print("\t");
+ System.out.print("N=");
+ System.out.print(r);
+ System.out.print("\n");
+
+ // start a thread to compute each c[l,n]
+ for (int i = 0; i < NUM_THREADS; i++) {
+ tmp = mm[i];
+ tmp.start();
+ }
+
+ // wait for them to finish
+ for (int i = 0; i < NUM_THREADS; i++) {
+ try {
+ mm[i].join();
+ } catch (InterruptedException e) {
+ System.out.println("Join Error");
+ }
+
+ }
+
+ // print out the result of the matrix multiply
+ System.out.println("Starting\n");
+ System.out.println("Matrix Product c =\n");
+ int val;
+ for (int i = 0; i < p; i++) {
+ int c[]=matrix.c[i];
+ for (int j = 0; j < r; j++) {
+ val = c[j];
+ }
+ }
+ System.out.println("Finished\n");
+ }
+}
+
+class MMul{
+
+ public int L, M, N;
+ public int[][] a;
+ public int[][] b;
+ public int[][] c;
+ public int[][] btranspose;
+
+ public MMul(int L, int M, int N) {
+ this.L = L;
+ this.M = M;
+ this.N = N;
+ a = new int[L][M];
+ b = new int[M][N];
+ c = new int[L][N];
+ btranspose = new int[N][M];
+ }
+
+ public void setValues() {
+ for(int i = 0; i < L; i++) {
+ int ai[] = a[i];
+ for(int j = 0; j < M; j++) {
+ ai[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < M; i++) {
+ int bi[] = b[i];
+ for(int j = 0; j < N; j++) {
+ bi[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < L; i++) {
+ int ci[] = c[i];
+ for(int j = 0; j < N; j++) {
+ ci[j] = 0;
+ }
+ }
+ for(int i = 0; i < N; i++) {
+ int btransposei[] = btranspose[i];
+ for(int j = 0; j < M; j++) {
+ btransposei[j] = 0;
+ }
+ }
+ }
+
+ public void transpose() {
+ for(int row = 0; row < M; row++) {
+ int brow[] = b[row];
+ for(int col = 0; col < N; col++) {
+ btranspose[col][row] = brow[col];
+ }
+ }
+ }
+}
--- /dev/null
+MAINCLASS=MatrixMultiply
+SRC=${MAINCLASS}.java
+default:
+ ../../../../buildscript -optimize -thread -debug -mainclass ${MAINCLASS} ${SRC} -o ${MAINCLASS}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+++ /dev/null
-MAINCLASS=MatrixMultiply
-SRC1=${MAINCLASS}N.java
-SRC2=${MAINCLASS}Nrun.java
-FLAGS=-dsm -dsmcaching -prefetch -optimize -excprefetch MatrixMultiply.main -excprefetch MMul.setValues -excprefetch MMul.transpose -mainclass ${MAINCLASS} -trueprob 0.98
-FLAGS1=-dsm -optimize -dsmcaching -mainclass ${MAINCLASS}
-FLAGS2=-dsm -optimize -mainclass ${MAINCLASS}
-default:
- ../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC ${SRC1}
-# ../../../buildscript ${FLAGS1} -o ${MAINCLASS}NNP ${SRC1}
- ../../../buildscript ${FLAGS} -o ${MAINCLASS}N ${SRC1}
- ../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC200 ${SRC2}
-# ../../../buildscript ${FLAGS1} -o ${MAINCLASS}NNP200 ${SRC2}
- ../../../buildscript ${FLAGS} -o ${MAINCLASS}N200 ${SRC2}
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}1NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}1NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}1.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}2NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}2NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}2.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}3NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}3NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}3.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}4NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}4NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}4.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}5NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}5NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}5.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}6NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}6NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}6.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}7NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}7NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}7.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}8NPNC.bin
-# cp ${MAINCLASS}NNP.bin ${MAINCLASS}8NP.bin
- cp ${MAINCLASS}N.bin ${MAINCLASS}8.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}1NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}1NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}1.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}2NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}2NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}2.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}3NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}3NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}3.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}4NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}4NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}4.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}5NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}5NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}5.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}6NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}6NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}6.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}7NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}7NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}7.bin
- cp ${MAINCLASS}NPNC200.bin ../MM200/${MAINCLASS}8NPNC.bin
-# cp ${MAINCLASS}NNP200.bin ../MM200/${MAINCLASS}8NP.bin
- cp ${MAINCLASS}N200.bin ../MM200/${MAINCLASS}8.bin
-
-clean:
- rm -rf tmpbuilddirectory
- rm *.bin
default:
../../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC ${SRC}
-# ../../../../buildscript ${FLAGS1} -o ${MAINCLASS}NP ${SRC}
- ../../../../buildscript ${FLAGS} -o ${MAINCLASS}P ${SRC}
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}1NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}1NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}1.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}2NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}2NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}2.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}3NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}3NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}3.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}4NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}4NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}4.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}5NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}5NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}5.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}6NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}6NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}6.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}7NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}7NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}7.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}8NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}8NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}8.bin
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS}N ${SRC}
clean:
rm -rf tmpbuilddirectory/
+++ /dev/null
-public class Barrier {
- int numthreads;
- int entercount;
- boolean cleared;
-
- public Barrier(int n) {
- numthreads=n;
- cleared = false;
- }
-
- public Barrier() {
-
- }
-
- public void reset() {
- cleared = false;
- entercount = 0;
- }
-
- public static void enterBarrier(Barrier b) {
- int tmp;
- boolean retry=true;
-
- do {
- if (!b.cleared) {
- b.entercount++;
- tmp = b.entercount;
- if (tmp==b.numthreads) {
- if(b.numthreads > 1)
- b.cleared=true;
- b.entercount--;
- return;
- }
- retry=false;
- }
- } while(retry);
-
- while(true) {
- if (b.cleared) {
- b.entercount--;
- int count = b.entercount;
- if (count==0)
- b.cleared=false;
- return;
- }
- }
- }
-}
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 1999. *
- * All rights reserved. *
- * *
- **************************************************************************/
-public class JGFInstrumentor{
-
- protected HashMap timers;
- protected HashMap data;
-
- public JGFInstrumentor() {
- timers = new HashMap();
- data = new HashMap();
- }
-
- public static void addTimer (String name, HashMap timers){
-
- if (timers.containsKey(name)) {
- System.printString("JGFInstrumentor.addTimer: warning - timer " + name +
- " already exists\n");
- }
- else {
- timers.put(name, new JGFTimer(name));
- }
- }
-
- public static void addTimer (String name, String opname, HashMap timers){
-
- if (timers.containsKey(name)) {
- System.printString("JGFInstrumentor.addTimer: warning - timer " + name +
- " already exists\n");
- }
- else {
- timers.put(name, new JGFTimer(name,opname));
- }
-
- }
-
- public static void addTimer (String name, String opname, int size, HashMap timers){
-
- if (timers.containsKey(name)) {
- System.printString("JGFInstrumentor.addTimer: warning - timer " + name +
- " already exists\n");
- }
- else {
- timers.put(name, new JGFTimer(name,opname,size));
- }
-
- }
-
- public static void startTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).start();
- }
- else {
- System.printString("JGFInstrumentor.startTimer: failed - timer " + name +
- " does not exist\n");
- }
-
- }
-
- public static void stopTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).stop();
- }
- else {
- System.printString("JGFInstrumentor.stopTimer: failed - timer " + name +
- " does not exist\n");
- }
- }
-
- public static void addOpsToTimer(String name, double count, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).addops(count);
- }
- else {
- System.printString("JGFInstrumentor.addOpsToTimer: failed - timer " + name +
- " does not exist\n");
- }
- }
-
- public static void addTimeToTimer(String name, double added_time, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).addtime(added_time);
- }
- else {
- System.printString("JGFInstrumentor.addTimeToTimer: failed - timer " + name +
- " does not exist\n");
- }
-
-
-
- }
-
- public static double readTimer(String name, HashMap timers){
- double time;
- if (timers.containsKey(name)) {
- time = ((JGFTimer) timers.get(name)).time;
- }
- else {
- System.printString("JGFInstrumentor.readTimer: failed - timer " + name +
- " does not exist\n");
- time = 0.0;
- }
- return time;
- }
-
- public static void resetTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).reset();
- }
- else {
- System.printString("JGFInstrumentor.resetTimer: failed - timer " + name +
- " does not exist\n");
- }
- }
-
- public static void printTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).print();
- }
- else {
- System.printString("JGFInstrumentor.printTimer: failed - timer " + name +
- " does not exist\n");
- }
- }
-
- public static void printperfTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).printperf();
- }
- else {
- System.printString("JGFInstrumentor.printTimer: failed - timer " + name +
- " does not exist\n");
- }
- }
-
- public static void storeData(String name, Object obj, HashMap data){
- data.put(name,obj);
- }
-
- public static void retrieveData(String name, Object obj, HashMap data){
- obj = data.get(name);
- }
-
- public static void printHeader(int section, int size,int nthreads) {
-
- String header, base;
-
- header = "";
- base = "Java Grande Forum Thread Benchmark Suite - Version 1.0 - Section ";
-
- if (section == 1)
- {
- header = base + "1";
- }
- else if (section == 2)
- {
- if (size == 0)
- header = base + "2 - Size A";
- else if (size == 1)
- header = base + "2 - Size B";
- else if (size == 2)
- header = base + "2 - Size C";
- }
- else if (section == 3)
- {
- if (size == 0)
- header = base + "3 - Size A";
- else if (size == 1)
- header = base + "3 - Size B";
- }
-
- System.printString(header + "\n");
-
- if (nthreads == 1) {
- System.printString("Executing on " + nthreads + " thread\n");
- }
- else {
- System.printString("Executing on " + nthreads + " threads\n");
- }
-
- System.printString(" \n");
- }
-}
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 2001. *
- * All rights reserved. *
- * *
- **************************************************************************/
-public class JGFMolDynBench {
- public int ITERS;
- public double LENGTH;
- public double m;
- public double mu;
- public double kb;
- public double TSIM;
- public double deltat;
-
- public int PARTSIZE;
-
- public double[] epot;
- public double[] vir;
- public double[] ek;
-
- int size,mm;
- int[] datasizes;
-
- public int interactions;
- public int[] interacts;
-
- public int nthreads;
- public JGFInstrumentor instr;
-
- public JGFMolDynBench(int nthreads) {
- this.nthreads=nthreads;
- }
-
- public void JGFsetsize(int size){
- this.size = size;
- }
-
- public void JGFinitialise(){
- interactions = 0;
- datasizes = new int[2];
- datasizes[0] = 8;
- datasizes[1] = 13;
-
- mm = datasizes[size];
- PARTSIZE = mm*mm*mm*4;
- ITERS = 100;
- LENGTH = 50e-10;
- m = 4.0026;
- mu = 1.66056e-27;
- kb = 1.38066e-23;
- TSIM = 50;
- deltat = 5e-16;
- }
-
- public static void JGFapplication(JGFMolDynBench mold) {
- // Create new arrays
- mold.epot = new double [mold.nthreads];
- mold.vir = new double [mold.nthreads];
- mold.ek = new double [mold.nthreads];
- mold.interacts = new int [mold.nthreads];
-
- int partsize, numthreads;
- partsize = mold.PARTSIZE;
- numthreads = mold.nthreads;
-
- double sh_force [][];
- double sh_force2 [][][];
- sh_force = new double[3][partsize];
- sh_force2 = new double[3][numthreads][partsize];
-
- // spawn threads
- mdRunner[] thobjects;
- Barrier br;
- thobjects = new mdRunner[numthreads];
- br= new Barrier(numthreads);
-
- int[] mid = new int[2];
- mid[0] = (128<<24)|(195<<16)|(175<<8)|73;
- mid[1] = (128<<24)|(195<<16)|(175<<8)|69;
- mdRunner tmp;
-
- for(int i=1;i<numthreads;i++) {
- thobjects[i] = new mdRunner(i,mold.mm,sh_force,sh_force2,br,mold.nthreads,mold);
- tmp = thobjects[i];
- //System.printString("Starting thread "+ i + "\n");
- tmp.start();
- }
- //System.printString("Finished starting rest threads\n");
-
- thobjects[0] = new mdRunner(0,mold.mm,sh_force,sh_force2,br,mold.nthreads,mold);
- tmp = thobjects[0];
- //System.printString("Starting thread 0\n");
- tmp.start();
- tmp.join();
- //System.printString("Finishing start\n");
-
- for(int i=1;i<numthreads;i++) {
- //System.printString("Joining thread "+ i + "\n");
- tmp = thobjects[i];
- tmp.join();
- }
- //System.printString("Finished joining all threads\n");
- }
-
- public void JGFvalidate(){
- double[] refval = new double[2];
- refval[0] = 1731.4306625334357;
- refval[1] = 7397.392307839352;
- double dev = Math.fabs(ek[0] - refval[size]);
- if (dev > 1.0e-10 ){
- //System.printString("Validation failed\n");
- //System.printString("Kinetic Energy = " + (long)ek[0] + " " + (long)dev + " " + size + "\n");
- }
- }
-}
-
-class mdRunner extends Thread {
-
- double count;
- int id,i,j,k,lg,mdsize,mm;
- double l,rcoff,rcoffs,side,sideh,hsq,hsq2,vel,velt;
- double a,r,sum,tscale,sc,ekin,ts,sp;
- double den;
- double tref;
- double h;
- double vaver,vaverh,rand;
- double etot,temp,pres,rp;
- double u1,u2,v1,v2,s, xx, yy, zz;
- double xvelocity, yvelocity, zvelocity;
-
- double [][] sh_force;
- double [][][] sh_force2;
-
- int ijk,npartm,iseed,tint;
- int irep;
- int istop;
- int iprint;
-
- Barrier br;
- random randnum;
- JGFMolDynBench mymd;
- int nthreads;
-
- particle[] one;
-
- public mdRunner(int id, int mm, double [][] sh_force, double [][][] sh_force2,Barrier br,
- int nthreads, JGFMolDynBench mymd) {
- this.id=id;
- this.mm=mm;
- this.sh_force=sh_force;
- this.sh_force2=sh_force2;
- this.br=br;
- this.nthreads = nthreads;
- this.mymd = mymd;
- count = 0.0;
- den = 0.83134;
- tref = 0.722;
- h = 0.064;
- irep = 10;
- istop = 19;
- iprint = 10;
- }
-
- public void run() {
-
- //System.printString("Start run method\n");
-
- /* Parameter determination */
-
- int tmpmdsize;
- double tmpden;
- int movemx=50;
- Barrier tmpbr;
-
- tmpbr=br;
- mdsize = mymd.PARTSIZE;
- one = new particle[mdsize];
- l = mymd.LENGTH;
- tmpmdsize = mdsize;
- tmpden = den;
- side = Math.pow((tmpmdsize/tmpden),0.3333333);
- rcoff = mm/4.0;
-
- a = side/mm;
- sideh = side*0.5;
- hsq = h*h;
- hsq2 = hsq*0.5;
- npartm = tmpmdsize - 1;
- rcoffs = rcoff * rcoff;
- tscale = 16.0 / (1.0 * tmpmdsize - 1.0);
- vaver = 1.13 * Math.sqrt(tref / 24.0);
- vaverh = vaver * h;
-
- /* Particle Generation */
-
- xvelocity = 0.0;
- yvelocity = 0.0;
- zvelocity = 0.0;
- ijk = 0;
-
- for (lg=0; lg<=1; lg++) {
- for (i=0; i<mm; i++) {
- for (j=0; j<mm; j++) {
- for (k=0; k<mm; k++) {
- one[ijk] = new particle((i*a+lg*a*0.5),(j*a+lg*a*0.5),(k*a),
- xvelocity,yvelocity,zvelocity,sh_force,sh_force2,id,this);
- ijk = ijk + 1;
- }
- }
- }
- }
-
- for (lg=1; lg<=2; lg++) {
- for (i=0; i<mm; i++) {
- for (j=0; j<mm; j++) {
- for (k=0; k<mm; k++) {
- one[ijk] = new particle((i*a+(2-lg)*a*0.5),(j*a+(lg-1)*a*0.5),
- (k*a+a*0.5),xvelocity,yvelocity,zvelocity,sh_force,sh_force2,id,this);
- ijk = ijk + 1;
- }
- }
- }
- }
-
- /* Initialise velocities */
-
- iseed = 0;
- v1 = 0.0;
- v2 = 0.0;
- randnum = new random(iseed,v1,v2);
-
- for (i=0; i<tmpmdsize; i+=2) {
- r = randnum.seed();
- one[i].xvelocity = r*randnum.v1;
- one[i+1].xvelocity = r*randnum.v2;
- }
-
- for (i=0; i<tmpmdsize; i+=2) {
- r = randnum.seed();
- one[i].yvelocity = r*randnum.v1;
- one[i+1].yvelocity = r*randnum.v2;
- }
-
- for (i=0; i<tmpmdsize; i+=2) {
- r = randnum.seed();
- one[i].zvelocity = r*randnum.v1;
- one[i+1].zvelocity = r*randnum.v2;
- }
-
-
- /* velocity scaling */
-
- ekin = 0.0;
- sp = 0.0;
-
- for(i=0;i<tmpmdsize;i++) {
- sp = sp + one[i].xvelocity;
- }
- sp = sp / tmpmdsize;
-
- for(i=0;i<tmpmdsize;i++) {
- one[i].xvelocity = one[i].xvelocity - sp;
- ekin = ekin + one[i].xvelocity*one[i].xvelocity;
- }
-
- sp = 0.0;
- for(i=0;i<tmpmdsize;i++) {
- sp = sp + one[i].yvelocity;
- }
- sp = sp / tmpmdsize;
-
- for(i=0;i<tmpmdsize;i++) {
- one[i].yvelocity = one[i].yvelocity - sp;
- ekin = ekin + one[i].yvelocity*one[i].yvelocity;
- }
-
-
- sp = 0.0;
- for(i=0;i<tmpmdsize;i++) {
- sp = sp + one[i].zvelocity;
- }
- sp = sp / tmpmdsize;
-
- for(i=0;i<tmpmdsize;i++) {
- one[i].zvelocity = one[i].zvelocity - sp;
- ekin = ekin + one[i].zvelocity*one[i].zvelocity;
- }
-
- ts = tscale * ekin;
- sc = h * Math.sqrt(tref/ts);
-
-
- for(i=0;i<tmpmdsize;i++) {
-
- one[i].xvelocity = one[i].xvelocity * sc;
- one[i].yvelocity = one[i].yvelocity * sc;
- one[i].zvelocity = one[i].zvelocity * sc;
-
- }
-
- /* Synchronise threads and start timer before MD simulation */
-
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //int myid;
- //atomic {
- // myid = id;
- //}
- //TournamentBarrier.enterBarrier(myid, tmpbr);
- //if (id == 0) JGFInstrumentor.startTimer("Section3:MolDyn:Run", instr.timers);
- //Barrier.enterBarrier(tmpbr);
-
- /* MD simulation */
-
- for (int move=0;move<movemx;move++) {
- /* move the particles and update velocities */
-
- for (i=0;i<tmpmdsize;i++) {
- one[i].domove(side,i);
- }
-
- /* Barrier */
- //System.printString("Barrier #2\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
-
- if(id==0) {
- for(j=0;j<3;j++) {
- for (i=0;i<tmpmdsize;i++) {
- sh_force[j][i] = 0.0;
- }
- }
- }
-
- mymd.epot[id] = 0.0;
- mymd.vir[id] = 0.0;
- mymd.interacts[id] = 0;
-
-
- /* Barrier */
- //System.printString("Barrier #3\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
-
- /* compute forces */
-
- for (i=0+id;i<tmpmdsize;i+=nthreads) {
- one[i].force(side,rcoff,tmpmdsize,i,xx,yy,zz,mymd);
- }
-
- /* Barrier */
- //System.printString("Barrier #4\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
-
- /* update force arrays */
- if(id == 0) {
- for(int k=0;k<3;k++) {
- for(i=0;i<tmpmdsize;i++) {
- for(j=0;j<nthreads;j++) {
- sh_force[k][i] += sh_force2[k][j][i];
- }
- }
- }
- }
-
- if(id == 0) {
- for(int k=0;k<3;k++) {
- for(i=0;i<tmpmdsize;i++) {
- for(j=0;j<nthreads;j++) {
- sh_force2[k][j][i] = 0.0;
- }
- }
- }
- }
-
- if(id==0) {
- for(j=1;j<nthreads;j++) {
- mymd.epot[0] += mymd.epot[j];
- mymd.vir[0] += mymd.vir[j];
- }
- for(j=1;j<nthreads;j++) {
- mymd.epot[j] = mymd.epot[0];
- mymd.vir[j] = mymd.vir[0];
- }
- for(j=0;j<nthreads;j++) {
- mymd.interactions += mymd.interacts[j];
- }
- }
-
- /* Barrier */
- //System.printString("Barrier #5\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
-
- if(id == 0) {
- for (j=0;j<3;j++) {
- for (i=0;i<tmpmdsize;i++) {
- sh_force[j][i] = sh_force[j][i] * hsq2;
- }
- }
- }
-
- sum = 0.0;
-
-
- /* Barrier */
- //System.printString("Barrier #6\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
-
- /*scale forces, update velocities */
-
- for (i=0;i<tmpmdsize;i++) {
- sum = sum + one[i].mkekin(hsq2,i);
- }
-
- ekin = sum/hsq;
-
- vel = 0.0;
- count = 0.0;
-
- /* average velocity */
-
- for (i=0;i<tmpmdsize;i++) {
- velt = one[i].velavg(vaverh,h);
- if(velt > vaverh) { count = count + 1.0; }
- vel = vel + velt;
- }
-
- vel = vel / h;
-
- /* temperature scale if required */
-
- if((move < istop) && (((move+1) % irep) == 0)) {
- sc = Math.sqrt(tref / (tscale*ekin));
- for (i=0;i<tmpmdsize;i++) {
- one[i].dscal(sc,1);
- }
- ekin = tref / tscale;
- }
-
- /* sum to get full potential energy and virial */
-
- if(((move+1) % iprint) == 0) {
- mymd.ek[id] = 24.0*ekin;
- mymd.epot[id] = 4.0*mymd.epot[id];
- etot = mymd.ek[id] + mymd.epot[id];
- temp = tscale * ekin;
- pres = tmpden * 16.0 * (ekin - mymd.vir[id]) / tmpmdsize;
- vel = vel / tmpmdsize;
- rp = (count / tmpmdsize) * 100.0;
- }
- //System.printString("Barrier #7\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
- }
-
- //System.printString("Barrier #8\n");
- Barrier.enterBarrier(tmpbr);
- //System.clearPrefetchCache();
- //TournamentBarrier.enterBarrier(myid, tmpbr);
- //if (id == 0) JGFInstrumentor.stopTimer("Section3:MolDyn:Run", instr.timers);
- //System.printString("End run method\n");
- }
-
-}
-
-
-
-
-class particle {
-
- public double xcoord, ycoord, zcoord;
- public double xvelocity,yvelocity,zvelocity;
- int part_id;
- int id;
- double [][] sh_force;
- double [][][] sh_force2;
- mdRunner runner;
-
- public particle(double xcoord, double ycoord, double zcoord, double xvelocity,
- double yvelocity,double zvelocity,double [][] sh_force,
- double [][][] sh_force2,int id,mdRunner runner) {
-
- this.xcoord = xcoord;
- this.ycoord = ycoord;
- this.zcoord = zcoord;
- this.xvelocity = xvelocity;
- this.yvelocity = yvelocity;
- this.zvelocity = zvelocity;
- this.sh_force = sh_force;
- this.sh_force2 = sh_force2;
- this.id=id;
- this.runner=runner;
- }
-
- public void domove(double side,int part_id) {
-
- xcoord = xcoord + xvelocity + sh_force[0][part_id];
- ycoord = ycoord + yvelocity + sh_force[1][part_id];
- zcoord = zcoord + zvelocity + sh_force[2][part_id];
-
- if(xcoord < 0) { xcoord = xcoord + side; }
- if(xcoord > side) { xcoord = xcoord - side; }
- if(ycoord < 0) { ycoord = ycoord + side; }
- if(ycoord > side) { ycoord = ycoord - side; }
- if(zcoord < 0) { zcoord = zcoord + side; }
- if(zcoord > side) { zcoord = zcoord - side; }
-
- xvelocity = xvelocity + sh_force[0][part_id];
- yvelocity = yvelocity + sh_force[1][part_id];
- zvelocity = zvelocity + sh_force[2][part_id];
-
- }
-
- public void force(double side, double rcoff,int mdsize,int x, double xx, double yy, double zz, JGFMolDynBench mymd) {
-
- double sideh;
- double rcoffs;
-
- double fxi,fyi,fzi;
- double rd,rrd,rrd2,rrd3,rrd4,rrd6,rrd7,r148;
- double forcex,forcey,forcez;
-
- sideh = 0.5*side;
- rcoffs = rcoff*rcoff;
-
- fxi = 0.0;
- fyi = 0.0;
- fzi = 0.0;
-
- for (int i=x+1;i<mdsize;i++) {
- xx = this.xcoord - runner.one[i].xcoord;
- yy = this.ycoord - runner.one[i].ycoord;
- zz = this.zcoord - runner.one[i].zcoord;
-
- if(xx < (-sideh)) { xx = xx + side; }
- if(xx > (sideh)) { xx = xx - side; }
- if(yy < (-sideh)) { yy = yy + side; }
- if(yy > (sideh)) { yy = yy - side; }
- if(zz < (-sideh)) { zz = zz + side; }
- if(zz > (sideh)) { zz = zz - side; }
-
-
- rd = xx*xx + yy*yy + zz*zz;
-
- if(rd <= rcoffs) {
- rrd = 1.0/rd;
- rrd2 = rrd*rrd;
- rrd3 = rrd2*rrd;
- rrd4 = rrd2*rrd2;
- rrd6 = rrd2*rrd4;
- rrd7 = rrd6*rrd;
- mymd.epot[id] = mymd.epot[id] + (rrd6 - rrd3);
- r148 = rrd7 - 0.5*rrd4;
- mymd.vir[id] = mymd.vir[id] - rd*r148;
- forcex = xx * r148;
- fxi = fxi + forcex;
-
- sh_force2[0][id][i] = sh_force2[0][id][i] - forcex;
-
- forcey = yy * r148;
- fyi = fyi + forcey;
-
- sh_force2[1][id][i] = sh_force2[1][id][i] - forcey;
-
- forcez = zz * r148;
- fzi = fzi + forcez;
-
- sh_force2[2][id][i] = sh_force2[2][id][i] - forcez;
-
- mymd.interacts[id]++;
- }
-
- }
-
- sh_force2[0][id][x] = sh_force2[0][id][x] + fxi;
- sh_force2[1][id][x] = sh_force2[1][id][x] + fyi;
- sh_force2[2][id][x] = sh_force2[2][id][x] + fzi;
-
- }
-
- public double mkekin(double hsq2,int part_id) {
-
- double sumt = 0.0;
-
- xvelocity = xvelocity + sh_force[0][part_id];
- yvelocity = yvelocity + sh_force[1][part_id];
- zvelocity = zvelocity + sh_force[2][part_id];
-
- sumt = (xvelocity*xvelocity)+(yvelocity*yvelocity)+(zvelocity*zvelocity);
- return sumt;
- }
-
- public double velavg(double vaverh,double h) {
-
- double velt;
- double sq;
-
- sq = Math.sqrt(xvelocity*xvelocity + yvelocity*yvelocity +
- zvelocity*zvelocity);
-
- velt = sq;
- return velt;
- }
-
- public void dscal(double sc,int incx) {
-
- xvelocity = xvelocity * sc;
- yvelocity = yvelocity * sc;
- zvelocity = zvelocity * sc;
-
-
-
- }
-
-}
-
-class random {
-
- public int iseed;
- public double v1,v2;
-
- public random(int iseed,double v1,double v2) {
- this.iseed = iseed;
- this.v1 = v1;
- this.v2 = v2;
- }
-
- public double update() {
-
- double rand;
- double scale= 4.656612875e-10;
-
- int is1,is2,iss2;
- int imult=16807;
- int imod = 2147483647;
-
- if (iseed<=0) { iseed = 1; }
-
- is2 = iseed % 32768;
- is1 = (iseed-is2)/32768;
- iss2 = is2 * imult;
- is2 = iss2 % 32768;
- is1 = (is1*imult+(iss2-is2)/32768) % (65536);
-
- iseed = (is1*32768+is2) % imod;
-
- rand = scale * iseed;
-
- return rand;
-
- }
-
- public double seed() {
-
- double s,u1,u2,r;
- s = 1.0;
- do {
- u1 = update();
- u2 = update();
-
- v1 = 2.0 * u1 - 1.0;
- v2 = 2.0 * u2 - 1.0;
- s = v1*v1 + v2*v2;
-
- } while (s >= 1.0);
-
- r = Math.sqrt(-2.0*Math.log(s)/s);
-
- return r;
-
- }
-}
-
-
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 2001. *
- * All rights reserved. *
- * *
- **************************************************************************/
-public class JGFMolDynBenchSizeA {
-
- public static void main(String argv[]){
- int nthreads;
- if(argv.length != 0 ) {
- nthreads = Integer.parseInt(argv[0]);
- } else {
- System.printString("The no of threads has not been specified, defaulting to 1\n");
- System.printString(" " + "\n");
- nthreads = 1;
- }
-
- JGFInstrumentor instr = new JGFInstrumentor();
- JGFInstrumentor.printHeader(3,0,nthreads);
-
- JGFMolDynBench mold;
- mold = new JGFMolDynBench(nthreads);
- int size = 0;
- JGFInstrumentor.addTimer("Section3:MolDyn:Total", "Solutions",size, instr.timers);
- JGFInstrumentor.addTimer("Section3:MolDyn:Run", "Interactions",size, instr.timers);
-
- mold.JGFsetsize(size);
-
- JGFInstrumentor.startTimer("Section3:MolDyn:Total", instr.timers);
-
- JGFMolDynBench tmp;
- mold.JGFinitialise();
- JGFMolDynBench.JGFapplication(mold);
- /* Validate data */
- double[] refval = new double[2];
- refval[0] = 1731.4306625334357;
- refval[1] = 7397.392307839352;
- double dval;
- //System.printString("Here #1\n");
- dval = mold.ek[0];
- //System.printString("Here #2\n");
- double dev = Math.fabs(dval - refval[size]);
- //long ldev = (long)dev * 1000000;
- //System.printString("ldev= "+ldev);
- //long ltmp = (long)1.0e-10 * 1000000;
- //System.printString("ltmp= "+ltmp);
- if (dev > 1.0e-10 ){
- //if (ldev > ltmp ){
- System.printString("Validation failed\n");
- System.printString("Kinetic Energy = " + (long)dval + " " + (long)dev + " " + size + "\n");
- }
- System.printString("End of JGFvalidate\n");
-
- JGFInstrumentor.stopTimer("Section3:MolDyn:Total", instr.timers);
- double interactions;
- System.printString("Here #3\n");
- interactions = mold.interactions;
- System.printString("Here #4\n");
-
- JGFInstrumentor.addOpsToTimer("Section3:MolDyn:Run", (double) interactions, instr.timers);
- JGFInstrumentor.addOpsToTimer("Section3:MolDyn:Total", 1, instr.timers);
-
- JGFInstrumentor.printTimer("Section3:MolDyn:Run", instr.timers);
- JGFInstrumentor.printTimer("Section3:MolDyn:Total", instr.timers);
- }
-}
-
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 1999. *
- * All rights reserved. *
- * *
- **************************************************************************/
-
-public class JGFTimer {
-
- public String name;
- public String opname;
- public double time;
- public double opcount;
- public long calls;
- public int size;
-
- private long start_time;
- private boolean on;
-
- public JGFTimer(String name, String opname){
- this.size = -1;
- this.name = name;
- this.opname = opname;
- reset();
- }
-
- public JGFTimer(String name, String opname, int size){
- this.name = name;
- this.opname = opname;
- this.size = size;
- reset();
- }
-
- public JGFTimer(String name){
- this.name = name;
- this.opname = "";
- reset();
- }
-
-
-
- public void start(){
- if (on) System.printString("Warning timer " + " was already turned on\n");
- on = true;
- start_time = System.currentTimeMillis();
- }
-
-
- public void stop(){
- time += (double) (System.currentTimeMillis()-start_time) / 1000.;
- if (!on) System.printString("Warning timer " + " wasn't turned on\n");
- calls++;
- on = false;
- }
-
- public void addops(double count){
- opcount += count;
- }
-
- public void addtime(double added_time){
- time += added_time;
- }
-
- public void reset(){
- time = 0.0;
- calls = 0;
- opcount = 0;
- on = false;
- }
-
- public double perf(){
- return opcount / time;
- }
-
- public void longprint(){
- System.printString("Timer Calls Time(s) Performance("+opname+"/s)\n");
- System.printString(name + " " + calls + " " + (long)time + " " + (long)this.perf() + "\n");
- }
-
- public void print(){
- if (opname.equals("")) {
- System.printString(name + " " + (long)time + " (s)\n");
- }
- else {
- if(size == 0) {
- System.printString(name + ":SizeA" + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
- } else if (size == 1) {
- System.printString(name + ":SizeB" + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
- } else if (size == 2) {
- System.printString(name + ":SizeC" + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
- } else{
- System.printString(name + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
- }
- }
- }
-
-
- public void printperf(){
-
- String name;
- name = this.name;
-
- // pad name to 40 characters
- while ( name.length() < 40 ) name = name + " ";
-
- System.printString(name + "\t" + (long)this.perf() + "\t"
- + " ("+opname+"/s)\n");
- }
-
-}
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 2001. *
- * All rights reserved. *
- * *
- **************************************************************************/
-
-// This implements a simple tournament-based barrier, using entirely its
-// own synchronisation. At present Yield() is called to stop busy-waiting
-// processes hogging the processor(s)!
-
-public class TournamentBarrier {
- // Array of flags indicating whether the given process and all those
- // for which it is responsible have finished. The "sense" of this
- // array alternates with each barrier, to prevent having to
- // reinitialise.
- boolean[] IsDone;
- public int maxBusyIter;
- int numThreads;
-
- public TournamentBarrier(int n) {
- numThreads = n;
- maxBusyIter = 1;
- // Superclass constructor should record the number of threads
- // and thread manager.
- //super(n);
-
- // Initialise the IsDone array. The choice of initial value is
- // arbitrary, but must be consistent!
- IsDone = new boolean[numThreads];
- for(int i = 0; i < n; i++) {
- IsDone[i] = false;
- }
- }
-
- // Uses the manager's debug function, so this can only be used after
- // construction!
- public void debug(String s) {
- //System.err.println("Debug message" + s);
- }
-
- /*
- public void setMaxBusyIter(int b) {
- maxBusyIter = b;
- }
- */
-
- public void DoBarrier(int myid) {
- int b;
- //debug("Thread " + myid + " checking in");
-
- int roundmask = 3;
- boolean donevalue = !IsDone[myid];
-
- while(((myid & roundmask) == 0) && (roundmask<(numThreads<<2))) {
- int spacing = (roundmask+1) >> 2;
- for(int i=1; i<=3 && myid+i*spacing < numThreads; i++) {
- //debug("Thread " + myid + " waiting for thread " + (myid+i*spacing));
- b = maxBusyIter;
- while(IsDone[myid+i*spacing] != donevalue) {
- b--;
- if(b==0) {
- //Thread.yield();
- b = maxBusyIter;
- }
- }
- }
- roundmask = (roundmask << 2) + 3;
- }
- //debug("Thread " + myid + " reporting done");
- IsDone[myid] = donevalue;
- b = maxBusyIter;
- while(IsDone[0] != donevalue) {
- b--;
- if(b==0) {
- //Thread.yield();
- b = maxBusyIter;
- }
- }
- //debug("Thread " + myid + " checking out");
-
- }
-}
+++ /dev/null
-
-#SRC = JGFMolDynBenchSizeA
-#default:
-# javac ${SRC}.java
-#run:
-# java ${SRC} 2
-#
-#clean:
-# rm *.class
-
-MAINCLASS=JGFMolDynBenchSizeA
-SRC=${MAINCLASS}.java \
-JGFInstrumentor.java \
-JGFTimer.java \
-JGFMolDynBench.java \
-Barrier.java
-#FLAGS=-dsm -prefetch -excprefetch particle.force -excprefetch particle.domove -excprefetch particle.mkekin -excprefetch TournamentBarrier.DoBarrier -excprefetch JGFMolDynBench.JGFvalidate -excprefetch JGFMolDynBench.JGFapplication -optimize -debug -profile -mainclass ${MAINCLASS} -o ${MAINCLASS} -trueprob 0.8
-FLAGS2= -thread -mainclass ${MAINCLASS} -o ${MAINCLASS}NP
-
-default:
- ../../../../buildscript ${FLAGS2} ${SRC}
-# ../../../../buildscript ${FLAGS} ${SRC}
-
-clean:
- rm -rf tmpbuilddirectory/
- rm *.bin
--- /dev/null
+public class Barrier {
+ int numthreads;
+ int entercount;
+ boolean cleared;
+
+ public Barrier(int n) {
+ numthreads=n;
+ cleared = false;
+ }
+
+ public Barrier() {
+
+ }
+
+ public void reset() {
+ cleared = false;
+ entercount = 0;
+ }
+
+ public static void enterBarrier(Barrier b) {
+ int tmp;
+ boolean retry=true;
+
+ do {
+ if (!b.cleared) {
+ b.entercount++;
+ tmp = b.entercount;
+ if (tmp==b.numthreads) {
+ if(b.numthreads > 1)
+ b.cleared=true;
+ b.entercount--;
+ return;
+ }
+ retry=false;
+ }
+ } while(retry);
+
+ while(true) {
+ if (b.cleared) {
+ b.entercount--;
+ int count = b.entercount;
+ if (count==0)
+ b.cleared=false;
+ return;
+ }
+ }
+ }
+}
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 1999. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+public class JGFInstrumentor{
+
+ protected HashMap timers;
+ protected HashMap data;
+
+ public JGFInstrumentor() {
+ timers = new HashMap();
+ data = new HashMap();
+ }
+
+ public static void addTimer (String name, HashMap timers){
+
+ if (timers.containsKey(name)) {
+ System.printString("JGFInstrumentor.addTimer: warning - timer " + name +
+ " already exists\n");
+ }
+ else {
+ timers.put(name, new JGFTimer(name));
+ }
+ }
+
+ public static void addTimer (String name, String opname, HashMap timers){
+
+ if (timers.containsKey(name)) {
+ System.printString("JGFInstrumentor.addTimer: warning - timer " + name +
+ " already exists\n");
+ }
+ else {
+ timers.put(name, new JGFTimer(name,opname));
+ }
+
+ }
+
+ public static void addTimer (String name, String opname, int size, HashMap timers){
+
+ if (timers.containsKey(name)) {
+ System.printString("JGFInstrumentor.addTimer: warning - timer " + name +
+ " already exists\n");
+ }
+ else {
+ timers.put(name, new JGFTimer(name,opname,size));
+ }
+
+ }
+
+ public static void startTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).start();
+ }
+ else {
+ System.printString("JGFInstrumentor.startTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+
+ }
+
+ public static void stopTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).stop();
+ }
+ else {
+ System.printString("JGFInstrumentor.stopTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+ }
+
+ public static void addOpsToTimer(String name, double count, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).addops(count);
+ }
+ else {
+ System.printString("JGFInstrumentor.addOpsToTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+ }
+
+ public static void addTimeToTimer(String name, double added_time, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).addtime(added_time);
+ }
+ else {
+ System.printString("JGFInstrumentor.addTimeToTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+
+
+
+ }
+
+ public static double readTimer(String name, HashMap timers){
+ double time;
+ if (timers.containsKey(name)) {
+ time = ((JGFTimer) timers.get(name)).time;
+ }
+ else {
+ System.printString("JGFInstrumentor.readTimer: failed - timer " + name +
+ " does not exist\n");
+ time = 0.0;
+ }
+ return time;
+ }
+
+ public static void resetTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).reset();
+ }
+ else {
+ System.printString("JGFInstrumentor.resetTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+ }
+
+ public static void printTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).print();
+ }
+ else {
+ System.printString("JGFInstrumentor.printTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+ }
+
+ public static void printperfTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).printperf();
+ }
+ else {
+ System.printString("JGFInstrumentor.printTimer: failed - timer " + name +
+ " does not exist\n");
+ }
+ }
+
+ public static void storeData(String name, Object obj, HashMap data){
+ data.put(name,obj);
+ }
+
+ public static void retrieveData(String name, Object obj, HashMap data){
+ obj = data.get(name);
+ }
+
+ public static void printHeader(int section, int size,int nthreads) {
+
+ String header, base;
+
+ header = "";
+ base = "Java Grande Forum Thread Benchmark Suite - Version 1.0 - Section ";
+
+ if (section == 1)
+ {
+ header = base + "1";
+ }
+ else if (section == 2)
+ {
+ if (size == 0)
+ header = base + "2 - Size A";
+ else if (size == 1)
+ header = base + "2 - Size B";
+ else if (size == 2)
+ header = base + "2 - Size C";
+ }
+ else if (section == 3)
+ {
+ if (size == 0)
+ header = base + "3 - Size A";
+ else if (size == 1)
+ header = base + "3 - Size B";
+ }
+
+ System.printString(header + "\n");
+
+ if (nthreads == 1) {
+ System.printString("Executing on " + nthreads + " thread\n");
+ }
+ else {
+ System.printString("Executing on " + nthreads + " threads\n");
+ }
+
+ System.printString(" \n");
+ }
+}
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 2001. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+public class JGFMolDynBench {
+ public int ITERS;
+ public double LENGTH;
+ public double m;
+ public double mu;
+ public double kb;
+ public double TSIM;
+ public double deltat;
+
+ public int PARTSIZE;
+
+ public double[] epot;
+ public double[] vir;
+ public double[] ek;
+
+ int size,mm;
+ int[] datasizes;
+
+ public int interactions;
+ public int[] interacts;
+
+ public int nthreads;
+ public JGFInstrumentor instr;
+
+ public JGFMolDynBench(int nthreads) {
+ this.nthreads=nthreads;
+ }
+
+ public void JGFsetsize(int size){
+ this.size = size;
+ }
+
+ public void JGFinitialise(){
+ interactions = 0;
+ datasizes = new int[2];
+ datasizes[0] = 8;
+ datasizes[1] = 13;
+
+ mm = datasizes[size];
+ PARTSIZE = mm*mm*mm*4;
+ ITERS = 100;
+ LENGTH = 50e-10;
+ m = 4.0026;
+ mu = 1.66056e-27;
+ kb = 1.38066e-23;
+ TSIM = 50;
+ deltat = 5e-16;
+ }
+
+ public static void JGFapplication(JGFMolDynBench mold) {
+ // Create new arrays
+ mold.epot = new double [mold.nthreads];
+ mold.vir = new double [mold.nthreads];
+ mold.ek = new double [mold.nthreads];
+ mold.interacts = new int [mold.nthreads];
+
+ int partsize, numthreads;
+ partsize = mold.PARTSIZE;
+ numthreads = mold.nthreads;
+
+ double sh_force [][];
+ double sh_force2 [][][];
+ sh_force = new double[3][partsize];
+ sh_force2 = new double[3][numthreads][partsize];
+
+ // spawn threads
+ mdRunner[] thobjects;
+ Barrier br;
+ thobjects = new mdRunner[numthreads];
+ br= new Barrier(numthreads);
+
+ int[] mid = new int[2];
+ mid[0] = (128<<24)|(195<<16)|(175<<8)|73;
+ mid[1] = (128<<24)|(195<<16)|(175<<8)|69;
+ mdRunner tmp;
+
+ for(int i=1;i<numthreads;i++) {
+ thobjects[i] = new mdRunner(i,mold.mm,sh_force,sh_force2,br,mold.nthreads,mold);
+ tmp = thobjects[i];
+ //System.printString("Starting thread "+ i + "\n");
+ tmp.start();
+ }
+ //System.printString("Finished starting rest threads\n");
+
+ thobjects[0] = new mdRunner(0,mold.mm,sh_force,sh_force2,br,mold.nthreads,mold);
+ tmp = thobjects[0];
+ //System.printString("Starting thread 0\n");
+ tmp.start();
+ tmp.join();
+ //System.printString("Finishing start\n");
+
+ for(int i=1;i<numthreads;i++) {
+ //System.printString("Joining thread "+ i + "\n");
+ tmp = thobjects[i];
+ tmp.join();
+ }
+ //System.printString("Finished joining all threads\n");
+ }
+
+ public void JGFvalidate(){
+ double[] refval = new double[2];
+ refval[0] = 1731.4306625334357;
+ refval[1] = 7397.392307839352;
+ double dev = Math.fabs(ek[0] - refval[size]);
+ if (dev > 1.0e-10 ){
+ //System.printString("Validation failed\n");
+ //System.printString("Kinetic Energy = " + (long)ek[0] + " " + (long)dev + " " + size + "\n");
+ }
+ }
+}
+
+class mdRunner extends Thread {
+
+ double count;
+ int id,i,j,k,lg,mdsize,mm;
+ double l,rcoff,rcoffs,side,sideh,hsq,hsq2,vel,velt;
+ double a,r,sum,tscale,sc,ekin,ts,sp;
+ double den;
+ double tref;
+ double h;
+ double vaver,vaverh,rand;
+ double etot,temp,pres,rp;
+ double u1,u2,v1,v2,s, xx, yy, zz;
+ double xvelocity, yvelocity, zvelocity;
+
+ double [][] sh_force;
+ double [][][] sh_force2;
+
+ int ijk,npartm,iseed,tint;
+ int irep;
+ int istop;
+ int iprint;
+
+ Barrier br;
+ random randnum;
+ JGFMolDynBench mymd;
+ int nthreads;
+
+ particle[] one;
+
+ public mdRunner(int id, int mm, double [][] sh_force, double [][][] sh_force2,Barrier br,
+ int nthreads, JGFMolDynBench mymd) {
+ this.id=id;
+ this.mm=mm;
+ this.sh_force=sh_force;
+ this.sh_force2=sh_force2;
+ this.br=br;
+ this.nthreads = nthreads;
+ this.mymd = mymd;
+ count = 0.0;
+ den = 0.83134;
+ tref = 0.722;
+ h = 0.064;
+ irep = 10;
+ istop = 19;
+ iprint = 10;
+ }
+
+ public void run() {
+
+ //System.printString("Start run method\n");
+
+ /* Parameter determination */
+
+ int tmpmdsize;
+ double tmpden;
+ int movemx=50;
+ Barrier tmpbr;
+
+ tmpbr=br;
+ mdsize = mymd.PARTSIZE;
+ one = new particle[mdsize];
+ l = mymd.LENGTH;
+ tmpmdsize = mdsize;
+ tmpden = den;
+ side = Math.pow((tmpmdsize/tmpden),0.3333333);
+ rcoff = mm/4.0;
+
+ a = side/mm;
+ sideh = side*0.5;
+ hsq = h*h;
+ hsq2 = hsq*0.5;
+ npartm = tmpmdsize - 1;
+ rcoffs = rcoff * rcoff;
+ tscale = 16.0 / (1.0 * tmpmdsize - 1.0);
+ vaver = 1.13 * Math.sqrt(tref / 24.0);
+ vaverh = vaver * h;
+
+ /* Particle Generation */
+
+ xvelocity = 0.0;
+ yvelocity = 0.0;
+ zvelocity = 0.0;
+ ijk = 0;
+
+ for (lg=0; lg<=1; lg++) {
+ for (i=0; i<mm; i++) {
+ for (j=0; j<mm; j++) {
+ for (k=0; k<mm; k++) {
+ one[ijk] = new particle((i*a+lg*a*0.5),(j*a+lg*a*0.5),(k*a),
+ xvelocity,yvelocity,zvelocity,sh_force,sh_force2,id,this);
+ ijk = ijk + 1;
+ }
+ }
+ }
+ }
+
+ for (lg=1; lg<=2; lg++) {
+ for (i=0; i<mm; i++) {
+ for (j=0; j<mm; j++) {
+ for (k=0; k<mm; k++) {
+ one[ijk] = new particle((i*a+(2-lg)*a*0.5),(j*a+(lg-1)*a*0.5),
+ (k*a+a*0.5),xvelocity,yvelocity,zvelocity,sh_force,sh_force2,id,this);
+ ijk = ijk + 1;
+ }
+ }
+ }
+ }
+
+ /* Initialise velocities */
+
+ iseed = 0;
+ v1 = 0.0;
+ v2 = 0.0;
+ randnum = new random(iseed,v1,v2);
+
+ for (i=0; i<tmpmdsize; i+=2) {
+ r = randnum.seed();
+ one[i].xvelocity = r*randnum.v1;
+ one[i+1].xvelocity = r*randnum.v2;
+ }
+
+ for (i=0; i<tmpmdsize; i+=2) {
+ r = randnum.seed();
+ one[i].yvelocity = r*randnum.v1;
+ one[i+1].yvelocity = r*randnum.v2;
+ }
+
+ for (i=0; i<tmpmdsize; i+=2) {
+ r = randnum.seed();
+ one[i].zvelocity = r*randnum.v1;
+ one[i+1].zvelocity = r*randnum.v2;
+ }
+
+
+ /* velocity scaling */
+
+ ekin = 0.0;
+ sp = 0.0;
+
+ for(i=0;i<tmpmdsize;i++) {
+ sp = sp + one[i].xvelocity;
+ }
+ sp = sp / tmpmdsize;
+
+ for(i=0;i<tmpmdsize;i++) {
+ one[i].xvelocity = one[i].xvelocity - sp;
+ ekin = ekin + one[i].xvelocity*one[i].xvelocity;
+ }
+
+ sp = 0.0;
+ for(i=0;i<tmpmdsize;i++) {
+ sp = sp + one[i].yvelocity;
+ }
+ sp = sp / tmpmdsize;
+
+ for(i=0;i<tmpmdsize;i++) {
+ one[i].yvelocity = one[i].yvelocity - sp;
+ ekin = ekin + one[i].yvelocity*one[i].yvelocity;
+ }
+
+
+ sp = 0.0;
+ for(i=0;i<tmpmdsize;i++) {
+ sp = sp + one[i].zvelocity;
+ }
+ sp = sp / tmpmdsize;
+
+ for(i=0;i<tmpmdsize;i++) {
+ one[i].zvelocity = one[i].zvelocity - sp;
+ ekin = ekin + one[i].zvelocity*one[i].zvelocity;
+ }
+
+ ts = tscale * ekin;
+ sc = h * Math.sqrt(tref/ts);
+
+
+ for(i=0;i<tmpmdsize;i++) {
+
+ one[i].xvelocity = one[i].xvelocity * sc;
+ one[i].yvelocity = one[i].yvelocity * sc;
+ one[i].zvelocity = one[i].zvelocity * sc;
+
+ }
+
+ /* Synchronise threads and start timer before MD simulation */
+
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //int myid;
+ //atomic {
+ // myid = id;
+ //}
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+ //if (id == 0) JGFInstrumentor.startTimer("Section3:MolDyn:Run", instr.timers);
+ //Barrier.enterBarrier(tmpbr);
+
+ /* MD simulation */
+
+ for (int move=0;move<movemx;move++) {
+ /* move the particles and update velocities */
+
+ for (i=0;i<tmpmdsize;i++) {
+ one[i].domove(side,i);
+ }
+
+ /* Barrier */
+ //System.printString("Barrier #2\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+
+ if(id==0) {
+ for(j=0;j<3;j++) {
+ for (i=0;i<tmpmdsize;i++) {
+ sh_force[j][i] = 0.0;
+ }
+ }
+ }
+
+ mymd.epot[id] = 0.0;
+ mymd.vir[id] = 0.0;
+ mymd.interacts[id] = 0;
+
+
+ /* Barrier */
+ //System.printString("Barrier #3\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+
+ /* compute forces */
+
+ for (i=0+id;i<tmpmdsize;i+=nthreads) {
+ one[i].force(side,rcoff,tmpmdsize,i,xx,yy,zz,mymd);
+ }
+
+ /* Barrier */
+ //System.printString("Barrier #4\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+
+ /* update force arrays */
+ if(id == 0) {
+ for(int k=0;k<3;k++) {
+ for(i=0;i<tmpmdsize;i++) {
+ for(j=0;j<nthreads;j++) {
+ sh_force[k][i] += sh_force2[k][j][i];
+ }
+ }
+ }
+ }
+
+ if(id == 0) {
+ for(int k=0;k<3;k++) {
+ for(i=0;i<tmpmdsize;i++) {
+ for(j=0;j<nthreads;j++) {
+ sh_force2[k][j][i] = 0.0;
+ }
+ }
+ }
+ }
+
+ if(id==0) {
+ for(j=1;j<nthreads;j++) {
+ mymd.epot[0] += mymd.epot[j];
+ mymd.vir[0] += mymd.vir[j];
+ }
+ for(j=1;j<nthreads;j++) {
+ mymd.epot[j] = mymd.epot[0];
+ mymd.vir[j] = mymd.vir[0];
+ }
+ for(j=0;j<nthreads;j++) {
+ mymd.interactions += mymd.interacts[j];
+ }
+ }
+
+ /* Barrier */
+ //System.printString("Barrier #5\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+
+ if(id == 0) {
+ for (j=0;j<3;j++) {
+ for (i=0;i<tmpmdsize;i++) {
+ sh_force[j][i] = sh_force[j][i] * hsq2;
+ }
+ }
+ }
+
+ sum = 0.0;
+
+
+ /* Barrier */
+ //System.printString("Barrier #6\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+
+ /*scale forces, update velocities */
+
+ for (i=0;i<tmpmdsize;i++) {
+ sum = sum + one[i].mkekin(hsq2,i);
+ }
+
+ ekin = sum/hsq;
+
+ vel = 0.0;
+ count = 0.0;
+
+ /* average velocity */
+
+ for (i=0;i<tmpmdsize;i++) {
+ velt = one[i].velavg(vaverh,h);
+ if(velt > vaverh) { count = count + 1.0; }
+ vel = vel + velt;
+ }
+
+ vel = vel / h;
+
+ /* temperature scale if required */
+
+ if((move < istop) && (((move+1) % irep) == 0)) {
+ sc = Math.sqrt(tref / (tscale*ekin));
+ for (i=0;i<tmpmdsize;i++) {
+ one[i].dscal(sc,1);
+ }
+ ekin = tref / tscale;
+ }
+
+ /* sum to get full potential energy and virial */
+
+ if(((move+1) % iprint) == 0) {
+ mymd.ek[id] = 24.0*ekin;
+ mymd.epot[id] = 4.0*mymd.epot[id];
+ etot = mymd.ek[id] + mymd.epot[id];
+ temp = tscale * ekin;
+ pres = tmpden * 16.0 * (ekin - mymd.vir[id]) / tmpmdsize;
+ vel = vel / tmpmdsize;
+ rp = (count / tmpmdsize) * 100.0;
+ }
+ //System.printString("Barrier #7\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+ }
+
+ //System.printString("Barrier #8\n");
+ Barrier.enterBarrier(tmpbr);
+ //System.clearPrefetchCache();
+ //TournamentBarrier.enterBarrier(myid, tmpbr);
+ //if (id == 0) JGFInstrumentor.stopTimer("Section3:MolDyn:Run", instr.timers);
+ //System.printString("End run method\n");
+ }
+
+}
+
+
+
+
+class particle {
+
+ public double xcoord, ycoord, zcoord;
+ public double xvelocity,yvelocity,zvelocity;
+ int part_id;
+ int id;
+ double [][] sh_force;
+ double [][][] sh_force2;
+ mdRunner runner;
+
+ public particle(double xcoord, double ycoord, double zcoord, double xvelocity,
+ double yvelocity,double zvelocity,double [][] sh_force,
+ double [][][] sh_force2,int id,mdRunner runner) {
+
+ this.xcoord = xcoord;
+ this.ycoord = ycoord;
+ this.zcoord = zcoord;
+ this.xvelocity = xvelocity;
+ this.yvelocity = yvelocity;
+ this.zvelocity = zvelocity;
+ this.sh_force = sh_force;
+ this.sh_force2 = sh_force2;
+ this.id=id;
+ this.runner=runner;
+ }
+
+ public void domove(double side,int part_id) {
+
+ xcoord = xcoord + xvelocity + sh_force[0][part_id];
+ ycoord = ycoord + yvelocity + sh_force[1][part_id];
+ zcoord = zcoord + zvelocity + sh_force[2][part_id];
+
+ if(xcoord < 0) { xcoord = xcoord + side; }
+ if(xcoord > side) { xcoord = xcoord - side; }
+ if(ycoord < 0) { ycoord = ycoord + side; }
+ if(ycoord > side) { ycoord = ycoord - side; }
+ if(zcoord < 0) { zcoord = zcoord + side; }
+ if(zcoord > side) { zcoord = zcoord - side; }
+
+ xvelocity = xvelocity + sh_force[0][part_id];
+ yvelocity = yvelocity + sh_force[1][part_id];
+ zvelocity = zvelocity + sh_force[2][part_id];
+
+ }
+
+ public void force(double side, double rcoff,int mdsize,int x, double xx, double yy, double zz, JGFMolDynBench mymd) {
+
+ double sideh;
+ double rcoffs;
+
+ double fxi,fyi,fzi;
+ double rd,rrd,rrd2,rrd3,rrd4,rrd6,rrd7,r148;
+ double forcex,forcey,forcez;
+
+ sideh = 0.5*side;
+ rcoffs = rcoff*rcoff;
+
+ fxi = 0.0;
+ fyi = 0.0;
+ fzi = 0.0;
+
+ for (int i=x+1;i<mdsize;i++) {
+ xx = this.xcoord - runner.one[i].xcoord;
+ yy = this.ycoord - runner.one[i].ycoord;
+ zz = this.zcoord - runner.one[i].zcoord;
+
+ if(xx < (-sideh)) { xx = xx + side; }
+ if(xx > (sideh)) { xx = xx - side; }
+ if(yy < (-sideh)) { yy = yy + side; }
+ if(yy > (sideh)) { yy = yy - side; }
+ if(zz < (-sideh)) { zz = zz + side; }
+ if(zz > (sideh)) { zz = zz - side; }
+
+
+ rd = xx*xx + yy*yy + zz*zz;
+
+ if(rd <= rcoffs) {
+ rrd = 1.0/rd;
+ rrd2 = rrd*rrd;
+ rrd3 = rrd2*rrd;
+ rrd4 = rrd2*rrd2;
+ rrd6 = rrd2*rrd4;
+ rrd7 = rrd6*rrd;
+ mymd.epot[id] = mymd.epot[id] + (rrd6 - rrd3);
+ r148 = rrd7 - 0.5*rrd4;
+ mymd.vir[id] = mymd.vir[id] - rd*r148;
+ forcex = xx * r148;
+ fxi = fxi + forcex;
+
+ sh_force2[0][id][i] = sh_force2[0][id][i] - forcex;
+
+ forcey = yy * r148;
+ fyi = fyi + forcey;
+
+ sh_force2[1][id][i] = sh_force2[1][id][i] - forcey;
+
+ forcez = zz * r148;
+ fzi = fzi + forcez;
+
+ sh_force2[2][id][i] = sh_force2[2][id][i] - forcez;
+
+ mymd.interacts[id]++;
+ }
+
+ }
+
+ sh_force2[0][id][x] = sh_force2[0][id][x] + fxi;
+ sh_force2[1][id][x] = sh_force2[1][id][x] + fyi;
+ sh_force2[2][id][x] = sh_force2[2][id][x] + fzi;
+
+ }
+
+ public double mkekin(double hsq2,int part_id) {
+
+ double sumt = 0.0;
+
+ xvelocity = xvelocity + sh_force[0][part_id];
+ yvelocity = yvelocity + sh_force[1][part_id];
+ zvelocity = zvelocity + sh_force[2][part_id];
+
+ sumt = (xvelocity*xvelocity)+(yvelocity*yvelocity)+(zvelocity*zvelocity);
+ return sumt;
+ }
+
+ public double velavg(double vaverh,double h) {
+
+ double velt;
+ double sq;
+
+ sq = Math.sqrt(xvelocity*xvelocity + yvelocity*yvelocity +
+ zvelocity*zvelocity);
+
+ velt = sq;
+ return velt;
+ }
+
+ public void dscal(double sc,int incx) {
+
+ xvelocity = xvelocity * sc;
+ yvelocity = yvelocity * sc;
+ zvelocity = zvelocity * sc;
+
+
+
+ }
+
+}
+
+class random {
+
+ public int iseed;
+ public double v1,v2;
+
+ public random(int iseed,double v1,double v2) {
+ this.iseed = iseed;
+ this.v1 = v1;
+ this.v2 = v2;
+ }
+
+ public double update() {
+
+ double rand;
+ double scale= 4.656612875e-10;
+
+ int is1,is2,iss2;
+ int imult=16807;
+ int imod = 2147483647;
+
+ if (iseed<=0) { iseed = 1; }
+
+ is2 = iseed % 32768;
+ is1 = (iseed-is2)/32768;
+ iss2 = is2 * imult;
+ is2 = iss2 % 32768;
+ is1 = (is1*imult+(iss2-is2)/32768) % (65536);
+
+ iseed = (is1*32768+is2) % imod;
+
+ rand = scale * iseed;
+
+ return rand;
+
+ }
+
+ public double seed() {
+
+ double s,u1,u2,r;
+ s = 1.0;
+ do {
+ u1 = update();
+ u2 = update();
+
+ v1 = 2.0 * u1 - 1.0;
+ v2 = 2.0 * u2 - 1.0;
+ s = v1*v1 + v2*v2;
+
+ } while (s >= 1.0);
+
+ r = Math.sqrt(-2.0*Math.log(s)/s);
+
+ return r;
+
+ }
+}
+
+
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 2001. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+public class JGFMolDynBenchSizeA {
+
+ public static void main(String argv[]){
+ int nthreads;
+ if(argv.length != 0 ) {
+ nthreads = Integer.parseInt(argv[0]);
+ } else {
+ System.printString("The no of threads has not been specified, defaulting to 1\n");
+ System.printString(" " + "\n");
+ nthreads = 1;
+ }
+
+ JGFInstrumentor instr = new JGFInstrumentor();
+ JGFInstrumentor.printHeader(3,0,nthreads);
+
+ JGFMolDynBench mold;
+ mold = new JGFMolDynBench(nthreads);
+ int size = 0;
+ JGFInstrumentor.addTimer("Section3:MolDyn:Total", "Solutions",size, instr.timers);
+ JGFInstrumentor.addTimer("Section3:MolDyn:Run", "Interactions",size, instr.timers);
+
+ mold.JGFsetsize(size);
+
+ JGFInstrumentor.startTimer("Section3:MolDyn:Total", instr.timers);
+
+ JGFMolDynBench tmp;
+ mold.JGFinitialise();
+ JGFMolDynBench.JGFapplication(mold);
+ /* Validate data */
+ double[] refval = new double[2];
+ refval[0] = 1731.4306625334357;
+ refval[1] = 7397.392307839352;
+ double dval;
+ //System.printString("Here #1\n");
+ dval = mold.ek[0];
+ //System.printString("Here #2\n");
+ double dev = Math.fabs(dval - refval[size]);
+ //long ldev = (long)dev * 1000000;
+ //System.printString("ldev= "+ldev);
+ //long ltmp = (long)1.0e-10 * 1000000;
+ //System.printString("ltmp= "+ltmp);
+ if (dev > 1.0e-10 ){
+ //if (ldev > ltmp ){
+ System.printString("Validation failed\n");
+ System.printString("Kinetic Energy = " + (long)dval + " " + (long)dev + " " + size + "\n");
+ }
+ System.printString("End of JGFvalidate\n");
+
+ JGFInstrumentor.stopTimer("Section3:MolDyn:Total", instr.timers);
+ double interactions;
+ System.printString("Here #3\n");
+ interactions = mold.interactions;
+ System.printString("Here #4\n");
+
+ JGFInstrumentor.addOpsToTimer("Section3:MolDyn:Run", (double) interactions, instr.timers);
+ JGFInstrumentor.addOpsToTimer("Section3:MolDyn:Total", 1, instr.timers);
+
+ JGFInstrumentor.printTimer("Section3:MolDyn:Run", instr.timers);
+ JGFInstrumentor.printTimer("Section3:MolDyn:Total", instr.timers);
+ }
+}
+
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 1999. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+
+public class JGFTimer {
+
+ public String name;
+ public String opname;
+ public double time;
+ public double opcount;
+ public long calls;
+ public int size;
+
+ private long start_time;
+ private boolean on;
+
+ public JGFTimer(String name, String opname){
+ this.size = -1;
+ this.name = name;
+ this.opname = opname;
+ reset();
+ }
+
+ public JGFTimer(String name, String opname, int size){
+ this.name = name;
+ this.opname = opname;
+ this.size = size;
+ reset();
+ }
+
+ public JGFTimer(String name){
+ this.name = name;
+ this.opname = "";
+ reset();
+ }
+
+
+
+ public void start(){
+ if (on) System.printString("Warning timer " + " was already turned on\n");
+ on = true;
+ start_time = System.currentTimeMillis();
+ }
+
+
+ public void stop(){
+ time += (double) (System.currentTimeMillis()-start_time) / 1000.;
+ if (!on) System.printString("Warning timer " + " wasn't turned on\n");
+ calls++;
+ on = false;
+ }
+
+ public void addops(double count){
+ opcount += count;
+ }
+
+ public void addtime(double added_time){
+ time += added_time;
+ }
+
+ public void reset(){
+ time = 0.0;
+ calls = 0;
+ opcount = 0;
+ on = false;
+ }
+
+ public double perf(){
+ return opcount / time;
+ }
+
+ public void longprint(){
+ System.printString("Timer Calls Time(s) Performance("+opname+"/s)\n");
+ System.printString(name + " " + calls + " " + (long)time + " " + (long)this.perf() + "\n");
+ }
+
+ public void print(){
+ if (opname.equals("")) {
+ System.printString(name + " " + (long)time + " (s)\n");
+ }
+ else {
+ if(size == 0) {
+ System.printString(name + ":SizeA" + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
+ } else if (size == 1) {
+ System.printString(name + ":SizeB" + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
+ } else if (size == 2) {
+ System.printString(name + ":SizeC" + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
+ } else{
+ System.printString(name + "\t" + (long)time + " (s) \t " + (long)this.perf() + "\t" + " ("+opname+"/s)\n");
+ }
+ }
+ }
+
+
+ public void printperf(){
+
+ String name;
+ name = this.name;
+
+ // pad name to 40 characters
+ while ( name.length() < 40 ) name = name + " ";
+
+ System.printString(name + "\t" + (long)this.perf() + "\t"
+ + " ("+opname+"/s)\n");
+ }
+
+}
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 2001. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+
+// This implements a simple tournament-based barrier, using entirely its
+// own synchronisation. At present Yield() is called to stop busy-waiting
+// processes hogging the processor(s)!
+
+public class TournamentBarrier {
+ // Array of flags indicating whether the given process and all those
+ // for which it is responsible have finished. The "sense" of this
+ // array alternates with each barrier, to prevent having to
+ // reinitialise.
+ boolean[] IsDone;
+ public int maxBusyIter;
+ int numThreads;
+
+ public TournamentBarrier(int n) {
+ numThreads = n;
+ maxBusyIter = 1;
+ // Superclass constructor should record the number of threads
+ // and thread manager.
+ //super(n);
+
+ // Initialise the IsDone array. The choice of initial value is
+ // arbitrary, but must be consistent!
+ IsDone = new boolean[numThreads];
+ for(int i = 0; i < n; i++) {
+ IsDone[i] = false;
+ }
+ }
+
+ // Uses the manager's debug function, so this can only be used after
+ // construction!
+ public void debug(String s) {
+ //System.err.println("Debug message" + s);
+ }
+
+ /*
+ public void setMaxBusyIter(int b) {
+ maxBusyIter = b;
+ }
+ */
+
+ public void DoBarrier(int myid) {
+ int b;
+ //debug("Thread " + myid + " checking in");
+
+ int roundmask = 3;
+ boolean donevalue = !IsDone[myid];
+
+ while(((myid & roundmask) == 0) && (roundmask<(numThreads<<2))) {
+ int spacing = (roundmask+1) >> 2;
+ for(int i=1; i<=3 && myid+i*spacing < numThreads; i++) {
+ //debug("Thread " + myid + " waiting for thread " + (myid+i*spacing));
+ b = maxBusyIter;
+ while(IsDone[myid+i*spacing] != donevalue) {
+ b--;
+ if(b==0) {
+ //Thread.yield();
+ b = maxBusyIter;
+ }
+ }
+ }
+ roundmask = (roundmask << 2) + 3;
+ }
+ //debug("Thread " + myid + " reporting done");
+ IsDone[myid] = donevalue;
+ b = maxBusyIter;
+ while(IsDone[0] != donevalue) {
+ b--;
+ if(b==0) {
+ //Thread.yield();
+ b = maxBusyIter;
+ }
+ }
+ //debug("Thread " + myid + " checking out");
+
+ }
+}
--- /dev/null
+MAINCLASS=JGFMolDynBenchSizeA
+SRC=${MAINCLASS}.java \
+JGFInstrumentor.java \
+JGFTimer.java \
+JGFMolDynBench.java \
+Barrier.java
+
+FLAGS2= -thread -optimize -mainclass ${MAINCLASS} -o ${MAINCLASS}NP
+
+default:
+ ../../../../buildscript ${FLAGS2} ${SRC}
+
+clean:
+ rm -rf tmpbuilddirectory/
+ rm *.bin
default:
../../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPNC ${SRC}
-# ../../../../buildscript ${FLAGS1} -o ${MAINCLASS}NP ${SRC}
- ../../../../buildscript ${FLAGS} -o ${MAINCLASS}P ${SRC}
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}1NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}1NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}1.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}2NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}2NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}2.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}3NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}3NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}3.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}4NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}4NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}4.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}5NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}5NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}5.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}6NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}6NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}6.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}7NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}7NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}7.bin
- cp ${MAINCLASS}NPNC.bin ${MAINCLASS}8NPNC.bin
-# cp ${MAINCLASS}NP.bin ${MAINCLASS}8NP.bin
- cp ${MAINCLASS}P.bin ${MAINCLASS}8.bin
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS}N ${SRC}
clean:
rm -rf tmpbuilddirectory
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 1999. *
- * All rights reserved. *
- * *
- **************************************************************************/
-import java.util.*;
-public class JGFInstrumentor{
-
- protected HashMap timers;
- protected HashMap data;
-
- public JGFInstrumentor() {
- timers = new HashMap();
- data = new HashMap();
- }
-
- public static void addTimer (String name, HashMap timers){
-
- if (timers.containsKey(name)) {
- System.out.println("JGFInstrumentor.addTimer: warning - timer " + name +
- " already exists");
- }
- else {
- timers.put(name, new JGFTimer(name));
- }
- }
-
- public static void addTimer (String name, String opname, HashMap timers){
-
- if (timers.containsKey(name)) {
- System.out.println("JGFInstrumentor.addTimer: warning - timer " + name +
- " already exists");
- }
- else {
- timers.put(name, new JGFTimer(name,opname));
- }
-
- }
-
- public static void addTimer (String name, String opname, int size, HashMap timers){
-
- if (timers.containsKey(name)) {
- System.out.println("JGFInstrumentor.addTimer: warning - timer " + name +
- " already exists");
- }
- else {
- timers.put(name, new JGFTimer(name,opname,size));
- }
-
- }
-
- public static void startTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).start();
- }
- else {
- System.out.println("JGFInstrumentor.startTimer: failed - timer " + name +
- " does not exist");
- }
-
- }
-
- public static void stopTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).stop();
- }
- else {
- System.out.println("JGFInstrumentor.stopTimer: failed - timer " + name +
- " does not exist");
- }
- }
-
- public static void addOpsToTimer(String name, double count, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).addops(count);
- }
- else {
- System.out.println("JGFInstrumentor.addOpsToTimer: failed - timer " + name +
- " does not exist");
- }
- }
-
- public static void addTimeToTimer(String name, double added_time, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).addtime(added_time);
- }
- else {
- System.out.println("JGFInstrumentor.addTimeToTimer: failed - timer " + name +
- " does not exist");
- }
-
-
-
- }
-
- public static double readTimer(String name, HashMap timers){
- double time;
- if (timers.containsKey(name)) {
- time = ((JGFTimer) timers.get(name)).time;
- }
- else {
- System.out.println("JGFInstrumentor.readTimer: failed - timer " + name +
- " does not exist");
- time = 0.0;
- }
- return time;
- }
-
- public static void resetTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).reset();
- }
- else {
- System.out.println("JGFInstrumentor.resetTimer: failed - timer " + name +
- " does not exist");
- }
- }
-
- public static void printTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).print();
- }
- else {
- System.out.println("JGFInstrumentor.printTimer: failed - timer " + name +
- " does not exist");
- }
- }
-
- public static void printperfTimer(String name, HashMap timers){
- if (timers.containsKey(name)) {
- ((JGFTimer) timers.get(name)).printperf();
- }
- else {
- System.out.println("JGFInstrumentor.printTimer: failed - timer " + name +
- " does not exist");
- }
- }
-
- public static void storeData(String name, Object obj, HashMap data){
- data.put(name,obj);
- }
-
- public static void retrieveData(String name, Object obj, HashMap data){
- obj = data.get(name);
- }
-
- public static void printHeader(int section, int size,int nthreads) {
-
- String header, base;
-
- header = "";
- base = "Java Grande Forum Thread Benchmark Suite - Version 1.0 - Section ";
-
- if (section == 1)
- {
- header = base + "1";
- }
- else if (section == 2)
- {
- if (size == 0)
- header = base + "2 - Size A";
- else if (size == 1)
- header = base + "2 - Size B";
- else if (size == 2)
- header = base + "2 - Size C";
- }
- else if (section == 3)
- {
- if (size == 0)
- header = base + "3 - Size A";
- else if (size == 1)
- header = base + "3 - Size B";
- }
-
- System.out.println(header);
-
- if (nthreads == 1) {
- System.out.println("Executing on " + nthreads + " thread");
- }
- else {
- System.out.println("Executing on " + nthreads + " threads");
- }
-
- System.out.println("");
- }
-}
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 2001. *
- * All rights reserved. *
- * *
- **************************************************************************/
-
-import java.util.Random;
-
-public class JGFSORBench {
-
- int size;
- int[] datasizes;
- int JACOBI_NUM_ITER;
- long RANDOM_SEED;
- public int nthreads;
- Random R;
- public double Gtotal;
- public int cachelinesize;
- public long sync[][];
-
- public JGFInstrumentor instr;
-
- public JGFSORBench(int nthreads, JGFInstrumentor instr){
- this.nthreads = nthreads;
- this.instr = instr;
- datasizes = new int[3];
- datasizes[0] = 1000;
- datasizes[1] = 1500;
- datasizes[2] = 2000;
- JACOBI_NUM_ITER = 100;
- RANDOM_SEED = 10101010;
- R = new Random(RANDOM_SEED);
- Gtotal = 0.0;
- cachelinesize = 128;
- }
-
- public void JGFsetsize(int size){
- this.size = size;
- }
-
- public static void JGFkernel(JGFSORBench sor, JGFInstrumentor instr) {
- int numthreads;
- numthreads = sor.nthreads;
-
- double G[][] = sor.RandomMatrix(sor.datasizes[sor.size], sor.datasizes[sor.size], sor.R);
- int M = G.length;
- int N = G[0].length;
- double omega = 1.25;
- int num_iterations = sor.JACOBI_NUM_ITER;
-
-
- double omega_over_four = omega * 0.25;
- double one_minus_omega = 1.0 - omega;
-
- // update interior points
- //
- int Mm1 = M-1;
- int Nm1 = N-1;
-
- //spawn threads
- int cachelinesize = sor.cachelinesize;
-
- SORRunner thobjects[] = new SORRunner[numthreads];
- sor.sync = sor.init_sync(numthreads, cachelinesize);
-
- JGFInstrumentor.startTimer("Section2:SOR:Kernel", instr.timers);
-
- for(int i=1;i<numthreads;i++) {
- thobjects[i] = new SORRunner(i,omega,G,num_iterations,sor.sync,numthreads);
- thobjects[i].start();
- }
-
- thobjects[0] = new SORRunner(0,omega,G,num_iterations,sor.sync,numthreads);
- thobjects[0].start();
- try {
- thobjects[0].join();
- }
- catch (InterruptedException e) {}
-
-
- for(int i=1;i<numthreads;i++) {
- try {
- thobjects[i].join();
- }
- catch (InterruptedException e) {}
- }
-
- JGFInstrumentor.stopTimer("Section2:SOR:Kernel", instr.timers);
-
- for (int i=1; i<Nm1; i++) {
- for (int j=1; j<Nm1; j++) {
- sor.Gtotal += G[i][j];
- }
- }
-
- }
-
- private long[][] init_sync(int nthreads, int cachelinesize) {
- long sync[][] = new long [nthreads][cachelinesize];
- for (int i = 0; i<nthreads; i++)
- sync[i][0] = 0;
- return sync;
- }
-
- public void JGFvalidate(){
-
- double refval[] = {0.498574406322512,1.1234778980135105,1.9954895063582696};
- double dev = Math.abs(Gtotal - refval[size]);
- if (dev > 1.0e-12 ){
- System.out.println("Validation failed");
- System.out.println("Gtotal = " + Gtotal + " " + dev + " " + size);
- }
- }
-
- /*
- public void JGFtidyup(){
- System.gc();
- }
-
- public void JGFrun(int size){
-
-
- JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size);
-
- JGFsetsize(size);
- JGFinitialise();
- JGFkernel();
- JGFvalidate();
- JGFtidyup();
-
-
- JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (JACOBI_NUM_ITER));
-
- JGFInstrumentor.printTimer("Section2:SOR:Kernel");
- }
- */
-
- public double[][] RandomMatrix(int M, int N, Random R)
- {
- double A[][] = new double[M][N];
-
- for (int i=0; i<N; i++)
- for (int j=0; j<N; j++)
- {
- A[i][j] = R.nextDouble() * 1e-6;
- }
- return A;
- }
-
-
-}
+++ /dev/null
-/**************************************************************************
-* *
-* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
-* *
-* produced by *
-* *
-* Java Grande Benchmarking Project *
-* *
-* at *
-* *
-* Edinburgh Parallel Computing Centre *
-* *
-* email: epcc-javagrande@epcc.ed.ac.uk *
-* *
-* *
-* This version copyright (c) The University of Edinburgh, 2001. *
-* All rights reserved. *
-* *
-**************************************************************************/
-
-
-public class JGFSORBenchSizeA{
-
- public static void main(String argv[]){
-
- int nthreads;
-
- if(argv.length != 0 ) {
- nthreads = Integer.parseInt(argv[0]);
- } else {
- System.out.println("The no of threads has not been specified, defaulting to 1");
- System.out.println(" ");
- nthreads = 1;
- }
-
- JGFInstrumentor instr = new JGFInstrumentor();
- JGFInstrumentor.printHeader(2,0,nthreads);
-
- JGFSORBench sor = new JGFSORBench(nthreads,instr);
-
- int size = 0;
- JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size, instr.timers);
-
- sor.JGFsetsize(size);
- JGFSORBench.JGFkernel(sor,instr);
- sor.JGFvalidate();
-
- JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (sor.JACOBI_NUM_ITER), instr.timers);
-
- JGFInstrumentor.printTimer("Section2:SOR:Kernel", instr.timers);
-
- }
-}
-
+++ /dev/null
-/**************************************************************************
-* *
-* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
-* *
-* produced by *
-* *
-* Java Grande Benchmarking Project *
-* *
-* at *
-* *
-* Edinburgh Parallel Computing Centre *
-* *
-* email: epcc-javagrande@epcc.ed.ac.uk *
-* *
-* *
-* This version copyright (c) The University of Edinburgh, 2001. *
-* All rights reserved. *
-* *
-**************************************************************************/
-
-public class JGFSORBenchSizeB{
-
- public static void main(String argv[]){
- int nthreads;
-
- if(argv.length != 0 ) {
- nthreads = Integer.parseInt(argv[0]);
- } else {
- System.out.println("The no of threads has not been specified, defaulting to 1");
- System.out.println(" ");
- nthreads = 1;
- }
-
- JGFInstrumentor instr = new JGFInstrumentor();
- JGFInstrumentor.printHeader(2,1,nthreads);
-
- JGFSORBench sor = new JGFSORBench(nthreads,instr);
- int size = 1;
- JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size, instr.timers);
-
- sor.JGFsetsize(size);
- sor.JGFkernel();
- sor.JGFvalidate();
-
- JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (sor.JACOBI_NUM_ITER), instr.timers);
-
- JGFInstrumentor.printTimer("Section2:SOR:Kernel", instr.timers);
-
- }
-}
-
-
+++ /dev/null
-/**************************************************************************
-* *
-* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
-* *
-* produced by *
-* *
-* Java Grande Benchmarking Project *
-* *
-* at *
-* *
-* Edinburgh Parallel Computing Centre *
-* *
-* email: epcc-javagrande@epcc.ed.ac.uk *
-* *
-* *
-* This version copyright (c) The University of Edinburgh, 2001. *
-* All rights reserved. *
-* *
-**************************************************************************/
-
-public class JGFSORBenchSizeC{
-
- public static void main(String argv[]){
-
- int nthreads;
- if(argv.length != 0 ) {
- nthreads = Integer.parseInt(argv[0]);
- } else {
- System.out.println("The no of threads has not been specified, defaulting to 1");
- System.out.println(" ");
- nthreads = 1;
- }
-
- JGFInstrumentor instr = new JGFInstrumentor();
- JGFInstrumentor.printHeader(2,2,nthreads);
-
- JGFSORBench sor = new JGFSORBench(nthreads,instr);
-
- int size = 2;
- JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size, instr.timers);
-
- sor.JGFsetsize(size);
- sor.JGFkernel();
- sor.JGFvalidate();
-
- JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (sor.JACOBI_NUM_ITER), instr.timers);
-
- JGFInstrumentor.printTimer("Section2:SOR:Kernel", instr.timers);
-
- }
-}
-
-
+++ /dev/null
-/**************************************************************************
- * *
- * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
- * *
- * produced by *
- * *
- * Java Grande Benchmarking Project *
- * *
- * at *
- * *
- * Edinburgh Parallel Computing Centre *
- * *
- * email: epcc-javagrande@epcc.ed.ac.uk *
- * *
- * *
- * This version copyright (c) The University of Edinburgh, 1999. *
- * All rights reserved. *
- * *
- **************************************************************************/
-import java.util.*;
-
-public class JGFTimer {
-
- public String name;
- public String opname;
- public double time;
- public double opcount;
- public long calls;
- public int size;
-
- private long start_time;
- private boolean on;
-
- public JGFTimer(String name, String opname){
- this.size = -1;
- this.name = name;
- this.opname = opname;
- reset();
- }
-
- public JGFTimer(String name, String opname, int size){
- this.name = name;
- this.opname = opname;
- this.size = size;
- reset();
- }
-
- public JGFTimer(String name){
- this.name = name;
- this.opname = "";
- reset();
- }
-
-
-
- public void start(){
- if (on) System.out.println("Warning timer " + " was already turned on");
- on = true;
- start_time = System.currentTimeMillis();
- }
-
-
- public void stop(){
- time += (double) (System.currentTimeMillis()-start_time) / 1000.;
- if (!on) System.out.println("Warning timer " + " wasn't turned on");
- calls++;
- on = false;
- }
-
- public void addops(double count){
- opcount += count;
- }
-
- public void addtime(double added_time){
- time += added_time;
- }
-
- public void reset(){
- time = 0.0;
- calls = 0;
- opcount = 0;
- on = false;
- }
-
- public double perf(){
- return opcount / time;
- }
-
- public void longprint(){
- System.out.println("Timer Calls Time(s) Performance("+opname+"/s)");
- System.out.println(name + " " + calls + " " + time + " " + this.perf());
- }
-
- public void print(){
- if (opname.equals("")) {
- System.out.println(name + " " + time + " (s)");
- }
- else {
- if(size == 0) {
- System.out.println(name + ":SizeA" + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
- } else if (size == 1) {
- System.out.println(name + ":SizeB" + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
- } else if (size == 2) {
- System.out.println(name + ":SizeC" + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
- } else{
- System.out.println(name + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
- }
- }
- }
-
-
- public void printperf(){
-
- String name;
- name = this.name;
-
- // pad name to 40 characters
- while ( name.length() < 40 ) name = name + " ";
-
- System.out.println(name + "\t" + this.perf() + "\t"
- + " ("+opname+"/s)");
- }
-
-}
+++ /dev/null
-/**************************************************************************
-* *
-* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
-* *
-* produced by *
-* *
-* Java Grande Benchmarking Project *
-* *
-* at *
-* *
-* Edinburgh Parallel Computing Centre *
-* *
-* email: epcc-javagrande@epcc.ed.ac.uk *
-* *
-* adapted from SciMark 2.0, author Roldan Pozo (pozo@cam.nist.gov) *
-* *
-* This version copyright (c) The University of Edinburgh, 2001. *
-* All rights reserved. *
-* *
-**************************************************************************/
-
-class SORRunner extends Thread {
-
- int id,num_iterations;
- double G[][],omega;
- long sync[][];
- int nthreads;
-
- public SORRunner(int id, double omega, double G[][], int num_iterations,long[][] sync, int nthreads) {
- this.id = id;
- this.omega=omega;
- this.G=G;
- this.num_iterations=num_iterations;
- this.sync=sync;
- this.nthreads = nthreads;
- }
-
- public void run() {
-
- int M = G.length;
- int N = G[0].length;
-
- double omega_over_four = omega * 0.25;
- double one_minus_omega = 1.0 - omega;
-
- // update interior points
- //
- int Mm1 = M-1;
- int Nm1 = N-1;
-
-
- int ilow, iupper, slice, tslice, ttslice;
-
- tslice = (Mm1) / 2;
- ttslice = (tslice + nthreads-1)/nthreads;
- slice = ttslice*2;
-
- ilow=id*slice+1;
- iupper = ((id+1)*slice)+1;
- if (iupper > Mm1) iupper = Mm1+1;
- if (id == (nthreads-1)) iupper = Mm1+1;
-
- for (int p=0; p<2*num_iterations; p++) {
- for (int i=ilow+(p%2); i<iupper; i=i+2) {
-
- double [] Gi = G[i];
- double [] Gim1 = G[i-1];
-
- if(i == 1) {
- double [] Gip1 = G[i+1];
-
- for (int j=1; j<Nm1; j=j+2){
- Gi[j] = omega_over_four * (Gim1[j] + Gip1[j] + Gi[j-1]
- + Gi[j+1]) + one_minus_omega * Gi[j];
-
- }
- } else if (i == Mm1) {
-
- double [] Gim2 = G[i-2];
-
- for (int j=1; j<Nm1; j=j+2){
- if((j+1) != Nm1) {
- Gim1[j+1]=omega_over_four * (Gim2[j+1] + Gi[j+1] + Gim1[j]
- + Gim1[j+2]) + one_minus_omega * Gim1[j+1];
- }
- }
-
- } else {
-
- double [] Gip1 = G[i+1];
- double [] Gim2 = G[i-2];
-
- for (int j=1; j<Nm1; j=j+2){
- Gi[j] = omega_over_four * (Gim1[j] + Gip1[j] + Gi[j-1]
- + Gi[j+1]) + one_minus_omega * Gi[j];
-
- if((j+1) != Nm1) {
- Gim1[j+1]=omega_over_four * (Gim2[j+1] + Gi[j+1] + Gim1[j]
- + Gim1[j+2]) + one_minus_omega * Gim1[j+1];
- }
- }
- }
-
- }
-
- // Signal this thread has done iteration
- sync[id][0]++;
-
- // Wait for neighbours;
- if (id > 0) {
- while (sync[id-1][0] < sync[id][0]) ;
- }
- if (id < nthreads -1) {
- while (sync[id+1][0] < sync[id][0]) ;
- }
- }
-
- }
-}
+++ /dev/null
-SRC = JGFSORBenchSizeA
-default:
- javac ${SRC}.java
-run:
- java ${SRC} 2
-
-clean:
- rm *.class
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 1999. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+import java.util.*;
+public class JGFInstrumentor{
+
+ protected HashMap timers;
+ protected HashMap data;
+
+ public JGFInstrumentor() {
+ timers = new HashMap();
+ data = new HashMap();
+ }
+
+ public static void addTimer (String name, HashMap timers){
+
+ if (timers.containsKey(name)) {
+ System.out.println("JGFInstrumentor.addTimer: warning - timer " + name +
+ " already exists");
+ }
+ else {
+ timers.put(name, new JGFTimer(name));
+ }
+ }
+
+ public static void addTimer (String name, String opname, HashMap timers){
+
+ if (timers.containsKey(name)) {
+ System.out.println("JGFInstrumentor.addTimer: warning - timer " + name +
+ " already exists");
+ }
+ else {
+ timers.put(name, new JGFTimer(name,opname));
+ }
+
+ }
+
+ public static void addTimer (String name, String opname, int size, HashMap timers){
+
+ if (timers.containsKey(name)) {
+ System.out.println("JGFInstrumentor.addTimer: warning - timer " + name +
+ " already exists");
+ }
+ else {
+ timers.put(name, new JGFTimer(name,opname,size));
+ }
+
+ }
+
+ public static void startTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).start();
+ }
+ else {
+ System.out.println("JGFInstrumentor.startTimer: failed - timer " + name +
+ " does not exist");
+ }
+
+ }
+
+ public static void stopTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).stop();
+ }
+ else {
+ System.out.println("JGFInstrumentor.stopTimer: failed - timer " + name +
+ " does not exist");
+ }
+ }
+
+ public static void addOpsToTimer(String name, double count, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).addops(count);
+ }
+ else {
+ System.out.println("JGFInstrumentor.addOpsToTimer: failed - timer " + name +
+ " does not exist");
+ }
+ }
+
+ public static void addTimeToTimer(String name, double added_time, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).addtime(added_time);
+ }
+ else {
+ System.out.println("JGFInstrumentor.addTimeToTimer: failed - timer " + name +
+ " does not exist");
+ }
+
+
+
+ }
+
+ public static double readTimer(String name, HashMap timers){
+ double time;
+ if (timers.containsKey(name)) {
+ time = ((JGFTimer) timers.get(name)).time;
+ }
+ else {
+ System.out.println("JGFInstrumentor.readTimer: failed - timer " + name +
+ " does not exist");
+ time = 0.0;
+ }
+ return time;
+ }
+
+ public static void resetTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).reset();
+ }
+ else {
+ System.out.println("JGFInstrumentor.resetTimer: failed - timer " + name +
+ " does not exist");
+ }
+ }
+
+ public static void printTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).print();
+ }
+ else {
+ System.out.println("JGFInstrumentor.printTimer: failed - timer " + name +
+ " does not exist");
+ }
+ }
+
+ public static void printperfTimer(String name, HashMap timers){
+ if (timers.containsKey(name)) {
+ ((JGFTimer) timers.get(name)).printperf();
+ }
+ else {
+ System.out.println("JGFInstrumentor.printTimer: failed - timer " + name +
+ " does not exist");
+ }
+ }
+
+ public static void storeData(String name, Object obj, HashMap data){
+ data.put(name,obj);
+ }
+
+ public static void retrieveData(String name, Object obj, HashMap data){
+ obj = data.get(name);
+ }
+
+ public static void printHeader(int section, int size,int nthreads) {
+
+ String header, base;
+
+ header = "";
+ base = "Java Grande Forum Thread Benchmark Suite - Version 1.0 - Section ";
+
+ if (section == 1)
+ {
+ header = base + "1";
+ }
+ else if (section == 2)
+ {
+ if (size == 0)
+ header = base + "2 - Size A";
+ else if (size == 1)
+ header = base + "2 - Size B";
+ else if (size == 2)
+ header = base + "2 - Size C";
+ }
+ else if (section == 3)
+ {
+ if (size == 0)
+ header = base + "3 - Size A";
+ else if (size == 1)
+ header = base + "3 - Size B";
+ }
+
+ System.out.println(header);
+
+ if (nthreads == 1) {
+ System.out.println("Executing on " + nthreads + " thread");
+ }
+ else {
+ System.out.println("Executing on " + nthreads + " threads");
+ }
+
+ System.out.println("");
+ }
+}
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 2001. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+
+import java.util.Random;
+
+public class JGFSORBench {
+
+ int size;
+ int[] datasizes;
+ int JACOBI_NUM_ITER;
+ long RANDOM_SEED;
+ public int nthreads;
+ Random R;
+ public double Gtotal;
+ public int cachelinesize;
+ public long sync[][];
+
+ public JGFInstrumentor instr;
+
+ public JGFSORBench(int nthreads, JGFInstrumentor instr){
+ this.nthreads = nthreads;
+ this.instr = instr;
+ datasizes = new int[3];
+ datasizes[0] = 1000;
+ datasizes[1] = 1500;
+ datasizes[2] = 2000;
+ JACOBI_NUM_ITER = 100;
+ RANDOM_SEED = 10101010;
+ R = new Random(RANDOM_SEED);
+ Gtotal = 0.0;
+ cachelinesize = 128;
+ }
+
+ public void JGFsetsize(int size){
+ this.size = size;
+ }
+
+ public static void JGFkernel(JGFSORBench sor, JGFInstrumentor instr) {
+ int numthreads;
+ numthreads = sor.nthreads;
+
+ double G[][] = sor.RandomMatrix(sor.datasizes[sor.size], sor.datasizes[sor.size], sor.R);
+ int M = G.length;
+ int N = G[0].length;
+ double omega = 1.25;
+ int num_iterations = sor.JACOBI_NUM_ITER;
+
+
+ double omega_over_four = omega * 0.25;
+ double one_minus_omega = 1.0 - omega;
+
+ // update interior points
+ //
+ int Mm1 = M-1;
+ int Nm1 = N-1;
+
+ //spawn threads
+ int cachelinesize = sor.cachelinesize;
+
+ SORRunner thobjects[] = new SORRunner[numthreads];
+ sor.sync = sor.init_sync(numthreads, cachelinesize);
+
+ JGFInstrumentor.startTimer("Section2:SOR:Kernel", instr.timers);
+
+ for(int i=1;i<numthreads;i++) {
+ thobjects[i] = new SORRunner(i,omega,G,num_iterations,sor.sync,numthreads);
+ thobjects[i].start();
+ }
+
+ thobjects[0] = new SORRunner(0,omega,G,num_iterations,sor.sync,numthreads);
+ thobjects[0].start();
+ try {
+ thobjects[0].join();
+ }
+ catch (InterruptedException e) {}
+
+
+ for(int i=1;i<numthreads;i++) {
+ try {
+ thobjects[i].join();
+ }
+ catch (InterruptedException e) {}
+ }
+
+ JGFInstrumentor.stopTimer("Section2:SOR:Kernel", instr.timers);
+
+ for (int i=1; i<Nm1; i++) {
+ for (int j=1; j<Nm1; j++) {
+ sor.Gtotal += G[i][j];
+ }
+ }
+
+ }
+
+ private long[][] init_sync(int nthreads, int cachelinesize) {
+ long sync[][] = new long [nthreads][cachelinesize];
+ for (int i = 0; i<nthreads; i++)
+ sync[i][0] = 0;
+ return sync;
+ }
+
+ public void JGFvalidate(){
+
+ double refval[] = {0.498574406322512,1.1234778980135105,1.9954895063582696};
+ double dev = Math.abs(Gtotal - refval[size]);
+ if (dev > 1.0e-12 ){
+ System.out.println("Validation failed");
+ System.out.println("Gtotal = " + Gtotal + " " + dev + " " + size);
+ }
+ }
+
+ /*
+ public void JGFtidyup(){
+ System.gc();
+ }
+
+ public void JGFrun(int size){
+
+
+ JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size);
+
+ JGFsetsize(size);
+ JGFinitialise();
+ JGFkernel();
+ JGFvalidate();
+ JGFtidyup();
+
+
+ JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (JACOBI_NUM_ITER));
+
+ JGFInstrumentor.printTimer("Section2:SOR:Kernel");
+ }
+ */
+
+ public double[][] RandomMatrix(int M, int N, Random R)
+ {
+ double A[][] = new double[M][N];
+
+ for (int i=0; i<N; i++)
+ for (int j=0; j<N; j++)
+ {
+ A[i][j] = R.nextDouble() * 1e-6;
+ }
+ return A;
+ }
+
+
+}
--- /dev/null
+/**************************************************************************
+* *
+* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+* *
+* produced by *
+* *
+* Java Grande Benchmarking Project *
+* *
+* at *
+* *
+* Edinburgh Parallel Computing Centre *
+* *
+* email: epcc-javagrande@epcc.ed.ac.uk *
+* *
+* *
+* This version copyright (c) The University of Edinburgh, 2001. *
+* All rights reserved. *
+* *
+**************************************************************************/
+
+
+public class JGFSORBenchSizeA{
+
+ public static void main(String argv[]){
+
+ int nthreads;
+
+ if(argv.length != 0 ) {
+ nthreads = Integer.parseInt(argv[0]);
+ } else {
+ System.out.println("The no of threads has not been specified, defaulting to 1");
+ System.out.println(" ");
+ nthreads = 1;
+ }
+
+ JGFInstrumentor instr = new JGFInstrumentor();
+ JGFInstrumentor.printHeader(2,0,nthreads);
+
+ JGFSORBench sor = new JGFSORBench(nthreads,instr);
+
+ int size = 0;
+ JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size, instr.timers);
+
+ sor.JGFsetsize(size);
+ JGFSORBench.JGFkernel(sor,instr);
+ sor.JGFvalidate();
+
+ JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (sor.JACOBI_NUM_ITER), instr.timers);
+
+ JGFInstrumentor.printTimer("Section2:SOR:Kernel", instr.timers);
+
+ }
+}
+
--- /dev/null
+/**************************************************************************
+* *
+* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+* *
+* produced by *
+* *
+* Java Grande Benchmarking Project *
+* *
+* at *
+* *
+* Edinburgh Parallel Computing Centre *
+* *
+* email: epcc-javagrande@epcc.ed.ac.uk *
+* *
+* *
+* This version copyright (c) The University of Edinburgh, 2001. *
+* All rights reserved. *
+* *
+**************************************************************************/
+
+public class JGFSORBenchSizeB{
+
+ public static void main(String argv[]){
+ int nthreads;
+
+ if(argv.length != 0 ) {
+ nthreads = Integer.parseInt(argv[0]);
+ } else {
+ System.out.println("The no of threads has not been specified, defaulting to 1");
+ System.out.println(" ");
+ nthreads = 1;
+ }
+
+ JGFInstrumentor instr = new JGFInstrumentor();
+ JGFInstrumentor.printHeader(2,1,nthreads);
+
+ JGFSORBench sor = new JGFSORBench(nthreads,instr);
+ int size = 1;
+ JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size, instr.timers);
+
+ sor.JGFsetsize(size);
+ sor.JGFkernel();
+ sor.JGFvalidate();
+
+ JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (sor.JACOBI_NUM_ITER), instr.timers);
+
+ JGFInstrumentor.printTimer("Section2:SOR:Kernel", instr.timers);
+
+ }
+}
+
+
--- /dev/null
+/**************************************************************************
+* *
+* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+* *
+* produced by *
+* *
+* Java Grande Benchmarking Project *
+* *
+* at *
+* *
+* Edinburgh Parallel Computing Centre *
+* *
+* email: epcc-javagrande@epcc.ed.ac.uk *
+* *
+* *
+* This version copyright (c) The University of Edinburgh, 2001. *
+* All rights reserved. *
+* *
+**************************************************************************/
+
+public class JGFSORBenchSizeC{
+
+ public static void main(String argv[]){
+
+ int nthreads;
+ if(argv.length != 0 ) {
+ nthreads = Integer.parseInt(argv[0]);
+ } else {
+ System.out.println("The no of threads has not been specified, defaulting to 1");
+ System.out.println(" ");
+ nthreads = 1;
+ }
+
+ JGFInstrumentor instr = new JGFInstrumentor();
+ JGFInstrumentor.printHeader(2,2,nthreads);
+
+ JGFSORBench sor = new JGFSORBench(nthreads,instr);
+
+ int size = 2;
+ JGFInstrumentor.addTimer("Section2:SOR:Kernel", "Iterations",size, instr.timers);
+
+ sor.JGFsetsize(size);
+ sor.JGFkernel();
+ sor.JGFvalidate();
+
+ JGFInstrumentor.addOpsToTimer("Section2:SOR:Kernel", (double) (sor.JACOBI_NUM_ITER), instr.timers);
+
+ JGFInstrumentor.printTimer("Section2:SOR:Kernel", instr.timers);
+
+ }
+}
+
+
--- /dev/null
+/**************************************************************************
+ * *
+ * Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+ * *
+ * produced by *
+ * *
+ * Java Grande Benchmarking Project *
+ * *
+ * at *
+ * *
+ * Edinburgh Parallel Computing Centre *
+ * *
+ * email: epcc-javagrande@epcc.ed.ac.uk *
+ * *
+ * *
+ * This version copyright (c) The University of Edinburgh, 1999. *
+ * All rights reserved. *
+ * *
+ **************************************************************************/
+import java.util.*;
+
+public class JGFTimer {
+
+ public String name;
+ public String opname;
+ public double time;
+ public double opcount;
+ public long calls;
+ public int size;
+
+ private long start_time;
+ private boolean on;
+
+ public JGFTimer(String name, String opname){
+ this.size = -1;
+ this.name = name;
+ this.opname = opname;
+ reset();
+ }
+
+ public JGFTimer(String name, String opname, int size){
+ this.name = name;
+ this.opname = opname;
+ this.size = size;
+ reset();
+ }
+
+ public JGFTimer(String name){
+ this.name = name;
+ this.opname = "";
+ reset();
+ }
+
+
+
+ public void start(){
+ if (on) System.out.println("Warning timer " + " was already turned on");
+ on = true;
+ start_time = System.currentTimeMillis();
+ }
+
+
+ public void stop(){
+ time += (double) (System.currentTimeMillis()-start_time) / 1000.;
+ if (!on) System.out.println("Warning timer " + " wasn't turned on");
+ calls++;
+ on = false;
+ }
+
+ public void addops(double count){
+ opcount += count;
+ }
+
+ public void addtime(double added_time){
+ time += added_time;
+ }
+
+ public void reset(){
+ time = 0.0;
+ calls = 0;
+ opcount = 0;
+ on = false;
+ }
+
+ public double perf(){
+ return opcount / time;
+ }
+
+ public void longprint(){
+ System.out.println("Timer Calls Time(s) Performance("+opname+"/s)");
+ System.out.println(name + " " + calls + " " + time + " " + this.perf());
+ }
+
+ public void print(){
+ if (opname.equals("")) {
+ System.out.println(name + " " + time + " (s)");
+ }
+ else {
+ if(size == 0) {
+ System.out.println(name + ":SizeA" + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
+ } else if (size == 1) {
+ System.out.println(name + ":SizeB" + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
+ } else if (size == 2) {
+ System.out.println(name + ":SizeC" + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
+ } else{
+ System.out.println(name + "\t" + time + " (s) \t " + this.perf() + "\t" + " ("+opname+"/s)");
+ }
+ }
+ }
+
+
+ public void printperf(){
+
+ String name;
+ name = this.name;
+
+ // pad name to 40 characters
+ while ( name.length() < 40 ) name = name + " ";
+
+ System.out.println(name + "\t" + this.perf() + "\t"
+ + " ("+opname+"/s)");
+ }
+
+}
--- /dev/null
+/**************************************************************************
+* *
+* Java Grande Forum Benchmark Suite - Thread Version 1.0 *
+* *
+* produced by *
+* *
+* Java Grande Benchmarking Project *
+* *
+* at *
+* *
+* Edinburgh Parallel Computing Centre *
+* *
+* email: epcc-javagrande@epcc.ed.ac.uk *
+* *
+* adapted from SciMark 2.0, author Roldan Pozo (pozo@cam.nist.gov) *
+* *
+* This version copyright (c) The University of Edinburgh, 2001. *
+* All rights reserved. *
+* *
+**************************************************************************/
+
+class SORRunner extends Thread {
+
+ int id,num_iterations;
+ double G[][],omega;
+ long sync[][];
+ int nthreads;
+
+ public SORRunner(int id, double omega, double G[][], int num_iterations,long[][] sync, int nthreads) {
+ this.id = id;
+ this.omega=omega;
+ this.G=G;
+ this.num_iterations=num_iterations;
+ this.sync=sync;
+ this.nthreads = nthreads;
+ }
+
+ public void run() {
+
+ int M = G.length;
+ int N = G[0].length;
+
+ double omega_over_four = omega * 0.25;
+ double one_minus_omega = 1.0 - omega;
+
+ // update interior points
+ //
+ int Mm1 = M-1;
+ int Nm1 = N-1;
+
+
+ int ilow, iupper, slice, tslice, ttslice;
+
+ tslice = (Mm1) / 2;
+ ttslice = (tslice + nthreads-1)/nthreads;
+ slice = ttslice*2;
+
+ ilow=id*slice+1;
+ iupper = ((id+1)*slice)+1;
+ if (iupper > Mm1) iupper = Mm1+1;
+ if (id == (nthreads-1)) iupper = Mm1+1;
+
+ for (int p=0; p<2*num_iterations; p++) {
+ for (int i=ilow+(p%2); i<iupper; i=i+2) {
+
+ double [] Gi = G[i];
+ double [] Gim1 = G[i-1];
+
+ if(i == 1) {
+ double [] Gip1 = G[i+1];
+
+ for (int j=1; j<Nm1; j=j+2){
+ Gi[j] = omega_over_four * (Gim1[j] + Gip1[j] + Gi[j-1]
+ + Gi[j+1]) + one_minus_omega * Gi[j];
+
+ }
+ } else if (i == Mm1) {
+
+ double [] Gim2 = G[i-2];
+
+ for (int j=1; j<Nm1; j=j+2){
+ if((j+1) != Nm1) {
+ Gim1[j+1]=omega_over_four * (Gim2[j+1] + Gi[j+1] + Gim1[j]
+ + Gim1[j+2]) + one_minus_omega * Gim1[j+1];
+ }
+ }
+
+ } else {
+
+ double [] Gip1 = G[i+1];
+ double [] Gim2 = G[i-2];
+
+ for (int j=1; j<Nm1; j=j+2){
+ Gi[j] = omega_over_four * (Gim1[j] + Gip1[j] + Gi[j-1]
+ + Gi[j+1]) + one_minus_omega * Gi[j];
+
+ if((j+1) != Nm1) {
+ Gim1[j+1]=omega_over_four * (Gim2[j+1] + Gi[j+1] + Gim1[j]
+ + Gim1[j+2]) + one_minus_omega * Gim1[j+1];
+ }
+ }
+ }
+
+ }
+
+ // Signal this thread has done iteration
+ sync[id][0]++;
+
+ // Wait for neighbours;
+ if (id > 0) {
+ while (sync[id-1][0] < sync[id][0]) ;
+ }
+ if (id < nthreads -1) {
+ while (sync[id+1][0] < sync[id][0]) ;
+ }
+ }
+
+ }
+}
--- /dev/null
+MAINCLASS=JGFMolDynBenchSizeD
+SRC=${MAINCLASS}.java \
+JGFInstrumentor.java \
+JGFTimer.java \
+JGFMolDynBench.java \
+SORRunner.java
+
+FLAGS2= -thread -optimize -mainclass ${MAINCLASS} -o ${MAINCLASS}NP
+
+default:
+ ../../../../buildscript ${FLAGS2} ${SRC}
+
+clean:
+ rm -rf tmpbuilddirectory/
+ rm *.bin