Robust/src/Benchmarks/Prefetch/Crypt/dsm/JGFCryptBenchSizeA.java

   1 public class JGFCryptBenchSizeA extends Thread{
   2   JGFCryptBench cb;
   3   int id,key[];
   4   byte text1[],text2[];
   5   int nthreads;
   6
   7   public JGFCryptBenchSizeA(JGFCryptBench cb, int id, byte [] text1, byte [] text2, int [] key, int nthreads) {
   8     this.cb = cb;
   9     this.id = id;
  10     this.text1=text1;
  11     this.text2=text2;
  12     this.key=key;
  13     this.nthreads = nthreads;
  14   }
  15
  16   // run()
  17   //
  18   // IDEA encryption/decryption algorithm. It processes plaintext in
  19   // 64-bit blocks, one at a time, breaking the block into four 16-bit
  20   // unsigned subblocks. It goes through eight rounds of processing
  21   // using 6 new subkeys each time, plus four for last step. The source
  22   // text is in array text1, the destination text goes into array text2
  23   // The routine represents 16-bit subblocks and subkeys as type int so
  24   // that they can be treated more easily as unsigned. Multiplication
  25   // modulo 0x10001 interprets a zero sub-block as 0x10000; it must to
  26   // fit in 16 bits.
  27   //
  28
  29   public void run() {
  30     int ilow, iupper, slice, tslice, ttslice;
  31
  32     atomic {
  33       tslice = text1.length / 8;
  34       ttslice = (tslice + nthreads-1) / nthreads;
  35       slice = ttslice*8;
  36
  37       ilow = id*slice;
  38       iupper = (id+1)*slice;
  39       if(iupper > text1.length) iupper = text1.length;
  40     }
  41
  42     int i1 = ilow;                 // Index into first text array.
  43     int i2 = ilow;                 // Index into second text array.
  44     int ik;                     // Index into key array.
  45     int x1, x2, x3, x4, t1, t2; // Four "16-bit" blocks, two temps.
  46     int r;                      // Eight rounds of processing.
  47
  48     for (int i =ilow ; i <iupper ; i +=8)
  49     {
  50
  51       ik = 0;                 // Restart key index.
  52       r = 8;                  // Eight rounds of processing.
  53
  54       // Load eight plain1 bytes as four 16-bit "unsigned" integers.
  55       // Masking with 0xff prevents sign extension with cast to int.
  56
  57       atomic {
  58         x1 = text1[i1++] & 0xff;          // Build 16-bit x1 from 2 bytes,
  59         x1 |= (text1[i1++] & 0xff) << 8;  // assuming low-order byte first.
  60         x2 = text1[i1++] & 0xff;
  61         x2 |= (text1[i1++] & 0xff) << 8;
  62         x3 = text1[i1++] & 0xff;
  63         x3 |= (text1[i1++] & 0xff) << 8;
  64         x4 = text1[i1++] & 0xff;
  65         x4 |= (text1[i1++] & 0xff) << 8;
  66       }
  67
  68       do {
  69         // 1) Multiply (modulo 0x10001), 1st text sub-block
  70         // with 1st key sub-block.
  71
  72         atomic {
  73           x1 = (int) ((long) x1 * key[ik++] % 0x10001L & 0xffff);
  74           // 2) Add (modulo 0x10000), 2nd text sub-block
  75           // with 2nd key sub-block.
  76
  77           x2 = x2 + key[ik++] & 0xffff;
  78
  79           // 3) Add (modulo 0x10000), 3rd text sub-block
  80           // with 3rd key sub-block.
  81
  82           x3 = x3 + key[ik++] & 0xffff;
  83
  84           // 4) Multiply (modulo 0x10001), 4th text sub-block
  85           // with 4th key sub-block.
  86
  87           x4 = (int) ((long) x4 * key[ik++] % 0x10001L & 0xffff);
  88         }
  89
  90         // 5) XOR results from steps 1 and 3.
  91
  92         t2 = x1 ^ x3;
  93
  94         // 6) XOR results from steps 2 and 4.
  95         // Included in step 8.
  96
  97         // 7) Multiply (modulo 0x10001), result of step 5
  98         // with 5th key sub-block.
  99
 100         atomic {
 101           t2 = (int) ((long) t2 * key[ik++] % 0x10001L & 0xffff);
 102         }
 103
 104         // 8) Add (modulo 0x10000), results of steps 6 and 7.
 105
 106         t1 = t2 + (x2 ^ x4) & 0xffff;
 107
 108         // 9) Multiply (modulo 0x10001), result of step 8
 109         // with 6th key sub-block.
 110
 111         atomic {
 112           t1 = (int) ((long) t1 * key[ik++] % 0x10001L & 0xffff);
 113         }
 114
 115         // 10) Add (modulo 0x10000), results of steps 7 and 9.
 116
 117         t2 = t1 + t2 & 0xffff;
 118
 119         // 11) XOR results from steps 1 and 9.
 120
 121         x1 ^= t1;
 122
 123         // 14) XOR results from steps 4 and 10. (Out of order).
 124
 125         x4 ^= t2;
 126
 127         // 13) XOR results from steps 2 and 10. (Out of order).
 128
 129         t2 ^= x2;
 130
 131         // 12) XOR results from steps 3 and 9. (Out of order).
 132
 133         x2 = x3 ^ t1;
 134
 135         x3 = t2;        // Results of x2 and x3 now swapped.
 136
 137       } while(--r != 0);  // Repeats seven more rounds.
 138
 139       // Final output transform (4 steps).
 140
 141       // 1) Multiply (modulo 0x10001), 1st text-block
 142       // with 1st key sub-block.
 143
 144       atomic {
 145         x1 = (int) ((long) x1 * key[ik++] % 0x10001L & 0xffff);
 146
 147         // 2) Add (modulo 0x10000), 2nd text sub-block
 148         // with 2nd key sub-block. It says x3, but that is to undo swap
 149         // of subblocks 2 and 3 in 8th processing round.
 150
 151         x3 = x3 + key[ik++] & 0xffff;
 152
 153         // 3) Add (modulo 0x10000), 3rd text sub-block
 154         // with 3rd key sub-block. It says x2, but that is to undo swap
 155         // of subblocks 2 and 3 in 8th processing round.
 156
 157         x2 = x2 + key[ik++] & 0xffff;
 158
 159         // 4) Multiply (modulo 0x10001), 4th text-block
 160         // with 4th key sub-block.
 161
 162         x4 = (int) ((long) x4 * key[ik++] % 0x10001L & 0xffff);
 163
 164         // Repackage from 16-bit sub-blocks to 8-bit byte array text2.
 165
 166         text2[i2++] = (byte) x1;
 167         text2[i2++] = (byte) (x1 >>> 8);
 168         text2[i2++] = (byte) x3;                // x3 and x2 are switched
 169         text2[i2++] = (byte) (x3 >>> 8);        // only in name.
 170         text2[i2++] = (byte) x2;
 171         text2[i2++] = (byte) (x2 >>> 8);
 172         text2[i2++] = (byte) x4;
 173         text2[i2++] = (byte) (x4 >>> 8);
 174       } //End of atomic
 175
 176     }   // End for loop.
 177
 178   }   // End routine.
 179
 180   public static void main(String argv[]){
 181     int nthreads;
 182     if(argv.length != 0 ) {
 183       nthreads = Integer.parseInt(argv[0]);
 184     } else {
 185       System.printString("The no of threads has not been specified, defaulting to 1");
 186       System.printString("  ");
 187       nthreads = 1;
 188     }
 189
 190     /* Instruments output messages */
 191     JGFInstrumentor instr = new JGFInstrumentor();
 192     instr.printHeader(2,0,nthreads);
 193
 194     JGFCryptBench cb;
 195     int size = 0;
 196     instr.addTimer("Section2:Crypt:Kernel", "Kbyte",size);
 197     atomic {
 198       cb = global new JGFCryptBench();
 199       cb.JGFsetsize(size);
 200       cb.JGFinitialise();
 201     }
 202
 203
 204     /* Start computation */
 205     int mid = (128<<24)|(195<<16)|(175<<8)|73;
 206
 207     JGFCryptBenchSizeA[] th;
 208     atomic {
 209       th = global new JGFCryptBenchSizeA [nthreads];
 210     }
 211
 212     // Start the stopwatch.
 213     instr.startTimer("Section2:Crypt:Kernel");
 214
 215     // Encrypt plain1.
 216     JGFCryptBenchSizeA tmp;
 217     for(int i=1;i<nthreads;i++) {
 218       atomic {
 219         th[i] = global new JGFCryptBenchSizeA(cb, i, cb.plain1, cb.crypt1, cb.Z, nthreads);
 220         tmp = th[i];
 221       }
 222       tmp.start(mid);
 223     }
 224
 225     atomic {
 226       th[0] = global new JGFCryptBenchSizeA(cb, 0, cb.plain1, cb.crypt1, cb.Z, nthreads);
 227       tmp = th[0];
 228     }
 229     tmp.start(mid);
 230
 231
 232     for(int i=1;i<nthreads;i++) {
 233       atomic {
 234         tmp = th[i];
 235       }
 236       tmp.join();
 237     }
 238
 239     // Decrypt.
 240     for(int i=1;i<nthreads;i++) {
 241       atomic {
 242         th[i] = global new JGFCryptBenchSizeA(cb, i, cb.crypt1, cb.plain2, cb.DK, nthreads);
 243         tmp = th[i];
 244       }
 245       tmp.start(mid);
 246     }
 247
 248     atomic {
 249       th[0] = global new JGFCryptBenchSizeA(cb, 0, cb.crypt1, cb.plain2, cb.DK, nthreads);
 250       tmp = th[0];
 251     }
 252     tmp.start(mid);
 253
 254
 255     for(int i=1;i<nthreads;i++) {
 256       atomic {
 257         tmp = th[i];
 258       }
 259       tmp.join();
 260     }
 261
 262     // Stop the stopwatch.
 263     instr.stopTimer("Section2:Crypt:Kernel");
 264
 265     atomic {
 266       cb.JGFvalidate();
 267       cb.JGFtidyup();
 268     }
 269
 270     int arows;
 271     atomic {
 272       arows = cb.array_rows;
 273     }
 274
 275     instr.addOpsToTimer("Section2:Crypt:Kernel", (2*arows)/1000.);
 276     instr.printTimer("Section2:Crypt:Kernel");
 277
 278     System.printString("Done\n");
 279   }
 280 }
 281