Robust/src/Benchmarks/Prefetch/Crypt/dsm/crypt/IDEATest.java

   1 /**************************************************************************
   2 *                                                                         *
   3 *         Java Grande Forum Benchmark Suite - Thread Version 1.0          *
   4 *                                                                         *
   5 *                            produced by                                  *
   6 *                                                                         *
   7 *                  Java Grande Benchmarking Project                       *
   8 *                                                                         *
   9 *                                at                                       *
  10 *                                                                         *
  11 *                Edinburgh Parallel Computing Centre                      *
  12 *                                                                         *
  13 *                email: epcc-javagrande@epcc.ed.ac.uk                     *
  14 *                                                                         *
  15 *                  Original version of this code by                       *
  16 *                 Gabriel Zachmann (zach@igd.fhg.de)                      *
  17 *                                                                         *
  18 *      This version copyright (c) The University of Edinburgh, 2001.      *
  19 *                         All rights reserved.                            *
  20 *                                                                         *
  21 **************************************************************************/
  22 /**************************************************************************
  23 *                       Ported for DSTM Benchmark                         *
  24 **************************************************************************/
  25
  26
  27 /**
  28  * Class IDEATest
  29  *
  30  * This test performs IDEA encryption then decryption. IDEA stands
  31  * for International Data Encryption Algorithm. The test is based
  32  * on code presented in Applied Cryptography by Bruce Schnier,
  33  * which was based on code developed by Xuejia Lai and James L.
  34  * Massey.
  35
  36  **/
  37
  38 //package crypt;
  39
  40 //import java.util.*;
  41 //import jgfutil.*;
  42 /*
  43
  44 class IDEATest
  45 {
  46
  47     // Declare class data. Byte buffer plain1 holds the original
  48     // data for encryption, crypt1 holds the encrypted data, and
  49     // plain2 holds the decrypted data, which should match plain1
  50     // byte for byte.
  51
  52     int array_rows;
  53
  54     byte [] plain1;       // Buffer for plaintext data.
  55     byte [] crypt1;       // Buffer for encrypted data.
  56     byte [] plain2;       // Buffer for decrypted data.
  57
  58     short [] userkey;     // Key for encryption/decryption.
  59     int [] Z;             // Encryption subkey (userkey derived).
  60     int [] DK;            // Decryption subkey (userkey derived).
  61
  62     void Do(int nthreads)
  63     {
  64
  65         int mid = (128<<24)|(195<<16)|(175<<8)|73;
  66
  67         IDEARunner[] th;
  68         atomic {
  69             th = global new IDEARunner [nthreads];
  70         }
  71
  72         // Start the stopwatch.
  73         //instr.startTimer("Section2:Crypt:Kernel");
  74
  75         // Encrypt plain1.
  76         IDEARunner tmp;
  77         for(int i=1;i<nthreads;i++) {
  78             atomic {
  79                 th[i] = global new IDEARunner(i,plain1,crypt1,Z,nthreads);
  80                 tmp = th[i];
  81             }
  82             tmp.start(mid);
  83         }
  84
  85         atomic {
  86             th[0] = global new IDEARunner(0,plain1,crypt1,Z,nthreads);
  87             tmp = th[0];
  88         }
  89         tmp.start(mid);
  90
  91
  92         for(int i=1;i<nthreads;i++) {
  93             atomic {
  94                 tmp = th[i];
  95             }
  96             tmp.join();
  97         }
  98
  99         // Decrypt.
 100         for(int i=1;i<nthreads;i++) {
 101             atomic {
 102                 th[i] = global new IDEARunner(i,crypt1,plain2,DK,nthreads);
 103                 tmp = th[i];
 104             }
 105             tmp.start(mid);
 106         }
 107
 108         atomic {
 109             th[0] = global new IDEARunner(0,crypt1,plain2,DK,nthreads);
 110             tmp = th[0];
 111         }
 112         tmp.start(mid);
 113
 114
 115         for(int i=1;i<nthreads;i++) {
 116             atomic {
 117                 tmp = th[i];
 118             }
 119             tmp.join();
 120         }
 121
 122
 123         // Stop the stopwatch.
 124         //instr.stopTimer("Section2:Crypt:Kernel");
 125
 126     }
 127
 128     //
 129     // buildTestData
 130     //Builds the data used for the test -- each time the test is run.
 131
 132
 133     void buildTestData()
 134     {
 135
 136
 137         // Create three byte arrays that will be used (and reused) for
 138         // encryption/decryption operations.
 139
 140
 141         plain1 = global new byte [array_rows];
 142         crypt1 = global new byte [array_rows];
 143         plain2 = global new byte [array_rows];
 144
 145
 146         Random rndnum = global new Random(136506717L);  // Create random number generator.
 147
 148
 149         // Allocate three arrays to hold keys: userkey is the 128-bit key.
 150         // Z is the set of 16-bit encryption subkeys derived from userkey,
 151         // while DK is the set of 16-bit decryption subkeys also derived
 152         // from userkey. NOTE: The 16-bit values are stored here in
 153         // 32-bit int arrays so that the values may be used in calculations
 154         // as if they are unsigned. Each 64-bit block of plaintext goes
 155         // through eight processing rounds involving six of the subkeys
 156         // then a final output transform with four of the keys; (8 * 6)
 157         // + 4 = 52 subkeys.
 158
 159         userkey = global new short [8];  // User key has 8 16-bit shorts.
 160         Z = global new int [52];         // Encryption subkey (user key derived).
 161         DK = global new int [52];        // Decryption subkey (user key derived).
 162
 163         // Generate user key randomly; eight 16-bit values in an array.
 164
 165         for (int i = 0; i < 8; i++)
 166         {
 167             // Again, the random number function returns int. Converting
 168             // to a short type preserves the bit pattern in the lower 16
 169             // bits of the int and discards the rest.
 170
 171             userkey[i] = (short) rndnum.nextInt();
 172         }
 173
 174         // Compute encryption and decryption subkeys.
 175
 176         calcEncryptKey();
 177         calcDecryptKey();
 178
 179         // Fill plain1 with "text."
 180         for (int i = 0; i < array_rows; i++)
 181         {
 182             plain1[i] = (byte) i;
 183
 184             // Converting to a byte
 185             // type preserves the bit pattern in the lower 8 bits of the
 186             // int and discards the rest.
 187         }
 188     }
 189
 190
 191     // calcEncryptKey
 192
 193     // Builds the 52 16-bit encryption subkeys Z[] from the user key and
 194     //stores in 32-bit int array. The routing corrects an error in the
 195     //source code in the Schnier book. Basically, the sense of the 7-
 196     //and 9-bit shifts are reversed. It still works reversed, but would
 197     //encrypted code would not decrypt with someone else's IDEA code.
 198     //
 199
 200     private void calcEncryptKey()
 201     {
 202         int j;                       // Utility variable.
 203
 204         for (int i = 0; i < 52; i++) // Zero out the 52-int Z array.
 205             Z[i] = 0;
 206
 207         for (int i = 0; i < 8; i++)  // First 8 subkeys are userkey itself.
 208         {
 209             Z[i] = userkey[i] & 0xffff;     // Convert "unsigned"
 210             // short to int.
 211         }
 212
 213         // Each set of 8 subkeys thereafter is derived from left rotating
 214         // the whole 128-bit key 25 bits to left (once between each set of
 215         // eight keys and then before the last four). Instead of actually
 216         // rotating the whole key, this routine just grabs the 16 bits
 217         // that are 25 bits to the right of the corresponding subkey
 218         // eight positions below the current subkey. That 16-bit extent
 219         // straddles two array members, so bits are shifted left in one
 220         // member and right (with zero fill) in the other. For the last
 221         // two subkeys in any group of eight, those 16 bits start to
 222         // wrap around to the first two members of the previous eight.
 223
 224         for (int i = 8; i < 52; i++)
 225         {
 226             int flag1 = 0;
 227             j = i % 8;
 228             if (j < 6)
 229             {
 230                 Z[i] = ((Z[i -7]>>>9) | (Z[i-6]<<7)) // Shift and combine.
 231                     & 0xFFFF;                    // Just 16 bits.
 232                 //continue;                            // Next iteration.
 233                 flag1 = 1;
 234             }
 235
 236             if(flag1 == 0) {
 237                 int flag2 = 0;
 238
 239                 if (j == 6)    // Wrap to beginning for second chunk.
 240                 {
 241                     Z[i] = ((Z[i -7]>>>9) | (Z[i-14]<<7))
 242                         & 0xFFFF;
 243                     //continue;
 244                     flag2 = 1;
 245                 }
 246
 247                 if(flag2 == 0) {
 248                     // j == 7 so wrap to beginning for both chunks.
 249                     Z[i] = ((Z[i -15]>>>9) | (Z[i-14]<<7))
 250                         & 0xFFFF;
 251                 }
 252             }
 253         }
 254     }
 255
 256     //
 257     //calcDecryptKey
 258     //
 259     //Builds the 52 16-bit encryption subkeys DK[] from the encryption-
 260     //subkeys Z[]. DK[] is a 32-bit int array holding 16-bit values as
 261     //unsigned.
 262     //
 263
 264     private void calcDecryptKey()
 265     {
 266         int j, k;                 // Index counters.
 267         int t1, t2, t3;           // Temps to hold decrypt subkeys.
 268
 269         t1 = inv(Z[0]);           // Multiplicative inverse (mod x10001).
 270         t2 = - Z[1] & 0xffff;     // Additive inverse, 2nd encrypt subkey.
 271         t3 = - Z[2] & 0xffff;     // Additive inverse, 3rd encrypt subkey.
 272
 273         DK[51] = inv(Z[3]);       // Multiplicative inverse (mod x10001).
 274         DK[50] = t3;
 275         DK[49] = t2;
 276         DK[48] = t1;
 277
 278         j = 47;                   // Indices into temp and encrypt arrays.
 279         k = 4;
 280         for (int i = 0; i < 7; i++)
 281         {
 282             t1 = Z[k++];
 283             DK[j--] = Z[k++];
 284             DK[j--] = t1;
 285             t1 = inv(Z[k++]);
 286             t2 = -Z[k++] & 0xffff;
 287             t3 = -Z[k++] & 0xffff;
 288             DK[j--] = inv(Z[k++]);
 289             DK[j--] = t2;
 290             DK[j--] = t3;
 291             DK[j--] = t1;
 292         }
 293
 294         t1 = Z[k++];
 295         DK[j--] = Z[k++];
 296         DK[j--] = t1;
 297         t1 = inv(Z[k++]);
 298         t2 = -Z[k++] & 0xffff;
 299         t3 = -Z[k++] & 0xffff;
 300         DK[j--] = inv(Z[k++]);
 301         DK[j--] = t3;
 302         DK[j--] = t2;
 303         DK[j--] = t1;
 304     }
 305
 306
 307
 308
 309
 310     //
 311     //mul
 312     //
 313     // Performs multiplication, modulo (2**16)+1. This code is structured
 314     // on the assumption that untaken branches are cheaper than taken
 315     // branches, and that the compiler doesn't schedule branches.
 316     // Java: Must work with 32-bit int and one 64-bit long to keep
 317     // 16-bit values and their products "unsigned." The routine assumes
 318     // that both a and b could fit in 16 bits even though they come in
 319     // as 32-bit ints. Lots of "& 0xFFFF" masks here to keep things 16-bit.
 320     // Also, because the routine stores mod (2**16)+1 results in a 2**16
 321     // space, the result is truncated to zero whenever the result would
 322     // zero, be 2**16. And if one of the multiplicands is 0, the result
 323     // is not zero, but (2**16) + 1 minus the other multiplicand (sort
 324     // of an additive inverse mod 0x10001).
 325
 326     // NOTE: The java conversion of this routine works correctly, but
 327     // is half the speed of using Java's modulus division function (%)
 328     // on the multiplication with a 16-bit masking of the result--running
 329     // in the Symantec Caje IDE. So it's not called for now; the test
 330     // uses Java % instead.
 331     //
 332
 333     private int mul(int a, int b)
 334     {
 335         int ret;
 336         long p;             // Large enough to catch 16-bit multiply
 337         // without hitting sign bit.
 338         if (a != 0)
 339         {
 340             if(b != 0)
 341             {
 342                 p = (long) a * b;
 343                 b = (int) p & 0xFFFF;       // Lower 16 bits.
 344                 a = (int) p >>> 16;         // Upper 16 bits.
 345                 if (b < a)
 346                     return (b - a + 1) & 0xFFFF;
 347                 else
 348                     return (b - a) & 0xFFFF;
 349             }
 350             else
 351                 return ((1 - a) & 0xFFFF);  // If b = 0, then same as
 352             // 0x10001 - a.
 353         }
 354         else                                // If a = 0, then return
 355             return((1 - b) & 0xFFFF);       // same as 0x10001 - b.
 356     }
 357
 358     //
 359     // inv
 360     //
 361     // Compute multiplicative inverse of x, modulo (2**16)+1 using
 362     // extended Euclid's GCD (greatest common divisor) algorithm.
 363     // It is unrolled twice to avoid swapping the meaning of
 364     // the registers. And some subtracts are changed to adds.
 365     // Java: Though it uses signed 32-bit ints, the interpretation
 366     // of the bits within is strictly unsigned 16-bit.
 367     //
 368
 369     private int inv(int x)
 370     {
 371         int t0, t1;
 372         int q, y;
 373
 374         if (x <= 1)             // Assumes positive x.
 375             return(x);          // 0 and 1 are self-inverse.
 376
 377         t1 = 0x10001 / x;       // (2**16+1)/x; x is >= 2, so fits 16 bits.
 378         y = 0x10001 % x;
 379         if (y == 1)
 380             return((1 - t1) & 0xFFFF);
 381
 382         t0 = 1;
 383         do {
 384             q = x / y;
 385             x = x % y;
 386             t0 += q * t1;
 387             if (x == 1) return(t0);
 388             q = y / x;
 389             y = y % x;
 390             t1 += q * t0;
 391         } while (y != 1);
 392
 393         return((1 - t1) & 0xFFFF);
 394     }
 395
 396     //
 397     // freeTestData
 398     //
 399     // Nulls arrays and forces garbage collection to free up memory.
 400     //
 401
 402     void freeTestData(int array_rows)
 403     {
 404         for(int i = 0; i<array_rows; i++) {
 405             plain1[i] = (byte) 0;
 406             crypt1[i] = (byte) 0;
 407             plain2[i] = (byte) 0;
 408         }
 409
 410         for(int i = 0; i<8; i++) {
 411             userkey[i] = (short) 0;
 412         }
 413
 414         for(int i = 0; i<52; i++) {
 415             Z[i] = 0;
 416             DK[i] = 0;
 417         }
 418
 419         //System.gc();                // Force garbage collection.
 420     }
 421
 422 }
 423 */
 424
 425
 426 public class IDEARunner extends Thread {
 427
 428     int id,key[];
 429     byte text1[],text2[];
 430     int nthreads;
 431
 432     public IDEARunner(int id, byte [] text1, byte [] text2, int [] key, int nthreads) {
 433         this.id = id;
 434         this.text1=text1;
 435         this.text2=text2;
 436         this.key=key;
 437         this.nthreads = nthreads;
 438     }
 439     //
 440     // run()
 441     //
 442     // IDEA encryption/decryption algorithm. It processes plaintext in
 443     // 64-bit blocks, one at a time, breaking the block into four 16-bit
 444     // unsigned subblocks. It goes through eight rounds of processing
 445     // using 6 new subkeys each time, plus four for last step. The source
 446     // text is in array text1, the destination text goes into array text2
 447     // The routine represents 16-bit subblocks and subkeys as type int so
 448     // that they can be treated more easily as unsigned. Multiplication
 449     // modulo 0x10001 interprets a zero sub-block as 0x10000; it must to
 450     // fit in 16 bits.
 451     //
 452
 453     public void run() {
 454         int ilow, iupper, slice, tslice, ttslice;
 455
 456         tslice = text1.length / 8;
 457         ttslice = (tslice + nthreads-1) / nthreads;
 458         slice = ttslice*8;
 459
 460         ilow = id*slice;
 461         iupper = (id+1)*slice;
 462         if(iupper > text1.length) iupper = text1.length;
 463
 464         int i1 = ilow;                 // Index into first text array.
 465         int i2 = ilow;                 // Index into second text array.
 466         int ik;                     // Index into key array.
 467         int x1, x2, x3, x4, t1, t2; // Four "16-bit" blocks, two temps.
 468         int r;                      // Eight rounds of processing.
 469
 470         for (int i =ilow ; i <iupper ; i +=8)
 471         {
 472
 473             ik = 0;                 // Restart key index.
 474             r = 8;                  // Eight rounds of processing.
 475
 476             // Load eight plain1 bytes as four 16-bit "unsigned" integers.
 477             // Masking with 0xff prevents sign extension with cast to int.
 478
 479             x1 = text1[i1++] & 0xff;          // Build 16-bit x1 from 2 bytes,
 480             x1 |= (text1[i1++] & 0xff) << 8;  // assuming low-order byte first.
 481             x2 = text1[i1++] & 0xff;
 482             x2 |= (text1[i1++] & 0xff) << 8;
 483             x3 = text1[i1++] & 0xff;
 484             x3 |= (text1[i1++] & 0xff) << 8;
 485             x4 = text1[i1++] & 0xff;
 486             x4 |= (text1[i1++] & 0xff) << 8;
 487
 488             do {
 489                 // 1) Multiply (modulo 0x10001), 1st text sub-block
 490                 // with 1st key sub-block.
 491
 492                 x1 = (int) ((long) x1 * key[ik++] % 0x10001L & 0xffff);
 493
 494                 // 2) Add (modulo 0x10000), 2nd text sub-block
 495                 // with 2nd key sub-block.
 496
 497                 x2 = x2 + key[ik++] & 0xffff;
 498
 499                 // 3) Add (modulo 0x10000), 3rd text sub-block
 500                 // with 3rd key sub-block.
 501
 502                 x3 = x3 + key[ik++] & 0xffff;
 503
 504                 // 4) Multiply (modulo 0x10001), 4th text sub-block
 505                 // with 4th key sub-block.
 506
 507                 x4 = (int) ((long) x4 * key[ik++] % 0x10001L & 0xffff);
 508
 509                 // 5) XOR results from steps 1 and 3.
 510
 511                 t2 = x1 ^ x3;
 512
 513                 // 6) XOR results from steps 2 and 4.
 514                 // Included in step 8.
 515
 516                 // 7) Multiply (modulo 0x10001), result of step 5
 517                 // with 5th key sub-block.
 518
 519                 t2 = (int) ((long) t2 * key[ik++] % 0x10001L & 0xffff);
 520
 521                 // 8) Add (modulo 0x10000), results of steps 6 and 7.
 522
 523                 t1 = t2 + (x2 ^ x4) & 0xffff;
 524
 525                 // 9) Multiply (modulo 0x10001), result of step 8
 526                 // with 6th key sub-block.
 527
 528                 t1 = (int) ((long) t1 * key[ik++] % 0x10001L & 0xffff);
 529
 530                 // 10) Add (modulo 0x10000), results of steps 7 and 9.
 531
 532                 t2 = t1 + t2 & 0xffff;
 533
 534                 // 11) XOR results from steps 1 and 9.
 535
 536                 x1 ^= t1;
 537
 538                 // 14) XOR results from steps 4 and 10. (Out of order).
 539
 540                 x4 ^= t2;
 541
 542                 // 13) XOR results from steps 2 and 10. (Out of order).
 543
 544                 t2 ^= x2;
 545
 546                 // 12) XOR results from steps 3 and 9. (Out of order).
 547
 548                 x2 = x3 ^ t1;
 549
 550                 x3 = t2;        // Results of x2 and x3 now swapped.
 551
 552             } while(--r != 0);  // Repeats seven more rounds.
 553
 554             // Final output transform (4 steps).
 555
 556             // 1) Multiply (modulo 0x10001), 1st text-block
 557             // with 1st key sub-block.
 558
 559             x1 = (int) ((long) x1 * key[ik++] % 0x10001L & 0xffff);
 560
 561             // 2) Add (modulo 0x10000), 2nd text sub-block
 562             // with 2nd key sub-block. It says x3, but that is to undo swap
 563             // of subblocks 2 and 3 in 8th processing round.
 564
 565             x3 = x3 + key[ik++] & 0xffff;
 566
 567             // 3) Add (modulo 0x10000), 3rd text sub-block
 568             // with 3rd key sub-block. It says x2, but that is to undo swap
 569             // of subblocks 2 and 3 in 8th processing round.
 570
 571             x2 = x2 + key[ik++] & 0xffff;
 572
 573             // 4) Multiply (modulo 0x10001), 4th text-block
 574             // with 4th key sub-block.
 575
 576             x4 = (int) ((long) x4 * key[ik++] % 0x10001L & 0xffff);
 577
 578             // Repackage from 16-bit sub-blocks to 8-bit byte array text2.
 579
 580             text2[i2++] = (byte) x1;
 581             text2[i2++] = (byte) (x1 >>> 8);
 582             text2[i2++] = (byte) x3;                // x3 and x2 are switched
 583             text2[i2++] = (byte) (x3 >>> 8);        // only in name.
 584             text2[i2++] = (byte) x2;
 585             text2[i2++] = (byte) (x2 >>> 8);
 586             text2[i2++] = (byte) x4;
 587             text2[i2++] = (byte) (x4 >>> 8);
 588
 589         }   // End for loop.
 590
 591     }   // End routine.
 592 }  // End of class