Robust/src/Benchmarks/Prefetch/Crypt/dsm/crypt/IDEATest.java

   1 /**************************************************************************
   2 *                                                                         *
   3 *         Java Grande Forum Benchmark Suite - Thread Version 1.0          *
   4 *                                                                         *
   5 *                            produced by                                  *
   6 *                                                                         *
   7 *                  Java Grande Benchmarking Project                       *
   8 *                                                                         *
   9 *                                at                                       *
  10 *                                                                         *
  11 *                Edinburgh Parallel Computing Centre                      *
  12 *                                                                         *
  13 *                email: epcc-javagrande@epcc.ed.ac.uk                     *
  14 *                                                                         *
  15 *                  Original version of this code by                       *
  16 *                 Gabriel Zachmann (zach@igd.fhg.de)                      *
  17 *                                                                         *
  18 *      This version copyright (c) The University of Edinburgh, 2001.      *
  19 *                         All rights reserved.                            *
  20 *                                                                         *
  21 **************************************************************************/
  22 /**************************************************************************
  23 *                       Ported for DSTM Benchmark                         *
  24 **************************************************************************/
  25
  26
  27 /**
  28  * Class IDEATest
  29  *
  30  * This test performs IDEA encryption then decryption. IDEA stands
  31  * for International Data Encryption Algorithm. The test is based
  32  * on code presented in Applied Cryptography by Bruce Schnier,
  33  * which was based on code developed by Xuejia Lai and James L.
  34  * Massey.
  35
  36  **/
  37
  38 //package crypt;
  39
  40 //import java.util.*;
  41 //import jgfutil.*;
  42
  43 class IDEATest
  44 {
  45
  46     // Declare class data. Byte buffer plain1 holds the original
  47     // data for encryption, crypt1 holds the encrypted data, and
  48     // plain2 holds the decrypted data, which should match plain1
  49     // byte for byte.
  50
  51     int array_rows;
  52
  53     byte [] plain1;       // Buffer for plaintext data.
  54     byte [] crypt1;       // Buffer for encrypted data.
  55     byte [] plain2;       // Buffer for decrypted data.
  56
  57     short [] userkey;     // Key for encryption/decryption.
  58     int [] Z;             // Encryption subkey (userkey derived).
  59     int [] DK;            // Decryption subkey (userkey derived).
  60
  61     void Do(int nthreads, JGFInstrumentor instr)
  62     {
  63
  64         int mid = (128<<24)|(195<<16)|(175<<8)|73;
  65
  66         IDEARunner tmp;
  67         IDEARunner[] th;
  68         atomic {
  69             th = global new IDEARunner [nthreads];
  70         }
  71
  72         // Start the stopwatch.
  73         instr.startTimer("Section2:Crypt:Kernel");
  74
  75         // Encrypt plain1.
  76         for(int i=1;i<nthreads;i++) {
  77             atomic {
  78                 th[i] = global new IDEARunner(i,plain1,crypt1,Z,nthreads);
  79                 tmp = th[i];
  80             }
  81             tmp.start(mid);
  82         }
  83
  84         atomic {
  85             th[0] = global new IDEARunner(0,plain1,crypt1,Z,nthreads);
  86             tmp = th[0];
  87         }
  88         tmp.start(mid);
  89
  90
  91         for(int i=1;i<nthreads;i++) {
  92             atomic {
  93                 tmp = th[i];
  94             }
  95             tmp.join();
  96         }
  97
  98         // Decrypt.
  99         for(int i=1;i<nthreads;i++) {
 100             atomic {
 101                 th[i] = global new IDEARunner(i,crypt1,plain2,DK,nthreads);
 102                 tmp = th[i];
 103             }
 104             tmp.start(mid);
 105         }
 106
 107         atomic {
 108             th[0] = global new IDEARunner(0,crypt1,plain2,DK,nthreads);
 109             tmp = th[0];
 110         }
 111         tmp.start(mid);
 112
 113
 114         for(int i=1;i<nthreads;i++) {
 115             atomic {
 116                 tmp = th[i];
 117             }
 118             tmp.join();
 119         }
 120
 121
 122         // Stop the stopwatch.
 123         instr.stopTimer("Section2:Crypt:Kernel");
 124
 125     }
 126
 127     //
 128     // buildTestData
 129     //Builds the data used for the test -- each time the test is run.
 130
 131
 132     void buildTestData()
 133     {
 134
 135
 136         // Create three byte arrays that will be used (and reused) for
 137         // encryption/decryption operations.
 138
 139         plain1 = new byte [array_rows];
 140         crypt1 = new byte [array_rows];
 141         plain2 = new byte [array_rows];
 142
 143
 144         Random rndnum = new Random(136506717L);  // Create random number generator.
 145
 146
 147         // Allocate three arrays to hold keys: userkey is the 128-bit key.
 148         // Z is the set of 16-bit encryption subkeys derived from userkey,
 149         // while DK is the set of 16-bit decryption subkeys also derived
 150         // from userkey. NOTE: The 16-bit values are stored here in
 151         // 32-bit int arrays so that the values may be used in calculations
 152         // as if they are unsigned. Each 64-bit block of plaintext goes
 153         // through eight processing rounds involving six of the subkeys
 154         // then a final output transform with four of the keys; (8 * 6)
 155         // + 4 = 52 subkeys.
 156
 157         userkey = new short [8];  // User key has 8 16-bit shorts.
 158         Z = new int [52];         // Encryption subkey (user key derived).
 159         DK = new int [52];        // Decryption subkey (user key derived).
 160
 161         // Generate user key randomly; eight 16-bit values in an array.
 162
 163         for (int i = 0; i < 8; i++)
 164         {
 165             // Again, the random number function returns int. Converting
 166             // to a short type preserves the bit pattern in the lower 16
 167             // bits of the int and discards the rest.
 168
 169             userkey[i] = (short) rndnum.nextInt();
 170         }
 171
 172         // Compute encryption and decryption subkeys.
 173
 174         calcEncryptKey();
 175         calcDecryptKey();
 176
 177         // Fill plain1 with "text."
 178         for (int i = 0; i < array_rows; i++)
 179         {
 180             plain1[i] = (byte) i;
 181
 182             // Converting to a byte
 183             // type preserves the bit pattern in the lower 8 bits of the
 184             // int and discards the rest.
 185         }
 186     }
 187
 188
 189     // calcEncryptKey
 190
 191     // Builds the 52 16-bit encryption subkeys Z[] from the user key and
 192     //stores in 32-bit int array. The routing corrects an error in the
 193     //source code in the Schnier book. Basically, the sense of the 7-
 194     //and 9-bit shifts are reversed. It still works reversed, but would
 195     //encrypted code would not decrypt with someone else's IDEA code.
 196     //
 197
 198     private void calcEncryptKey()
 199     {
 200         int j;                       // Utility variable.
 201
 202         for (int i = 0; i < 52; i++) // Zero out the 52-int Z array.
 203             Z[i] = 0;
 204
 205         for (int i = 0; i < 8; i++)  // First 8 subkeys are userkey itself.
 206         {
 207             Z[i] = userkey[i] & 0xffff;     // Convert "unsigned"
 208             // short to int.
 209         }
 210
 211         // Each set of 8 subkeys thereafter is derived from left rotating
 212         // the whole 128-bit key 25 bits to left (once between each set of
 213         // eight keys and then before the last four). Instead of actually
 214         // rotating the whole key, this routine just grabs the 16 bits
 215         // that are 25 bits to the right of the corresponding subkey
 216         // eight positions below the current subkey. That 16-bit extent
 217         // straddles two array members, so bits are shifted left in one
 218         // member and right (with zero fill) in the other. For the last
 219         // two subkeys in any group of eight, those 16 bits start to
 220         // wrap around to the first two members of the previous eight.
 221
 222         for (int i = 8; i < 52; i++)
 223         {
 224             int flag1 = 0;
 225             j = i % 8;
 226             if (j < 6)
 227             {
 228                 Z[i] = ((Z[i -7]>>>9) | (Z[i-6]<<7)) // Shift and combine.
 229                     & 0xFFFF;                    // Just 16 bits.
 230                 //continue;                            // Next iteration.
 231                 flag1 = 1;
 232             }
 233
 234             if(flag1 == 0) {
 235                 int flag2 = 0;
 236
 237                 if (j == 6)    // Wrap to beginning for second chunk.
 238                 {
 239                     Z[i] = ((Z[i -7]>>>9) | (Z[i-14]<<7))
 240                         & 0xFFFF;
 241                     //continue;
 242                     flag2 = 1;
 243                 }
 244
 245                 if(flag2 == 0) {
 246                     // j == 7 so wrap to beginning for both chunks.
 247                     Z[i] = ((Z[i -15]>>>9) | (Z[i-14]<<7))
 248                         & 0xFFFF;
 249                 }
 250             }
 251         }
 252     }
 253
 254     //
 255     //calcDecryptKey
 256     //
 257     //Builds the 52 16-bit encryption subkeys DK[] from the encryption-
 258     //subkeys Z[]. DK[] is a 32-bit int array holding 16-bit values as
 259     //unsigned.
 260     //
 261
 262     private void calcDecryptKey()
 263     {
 264         int j, k;                 // Index counters.
 265         int t1, t2, t3;           // Temps to hold decrypt subkeys.
 266
 267         t1 = inv(Z[0]);           // Multiplicative inverse (mod x10001).
 268         t2 = - Z[1] & 0xffff;     // Additive inverse, 2nd encrypt subkey.
 269         t3 = - Z[2] & 0xffff;     // Additive inverse, 3rd encrypt subkey.
 270
 271         DK[51] = inv(Z[3]);       // Multiplicative inverse (mod x10001).
 272         DK[50] = t3;
 273         DK[49] = t2;
 274         DK[48] = t1;
 275
 276         j = 47;                   // Indices into temp and encrypt arrays.
 277         k = 4;
 278         for (int i = 0; i < 7; i++)
 279         {
 280             t1 = Z[k++];
 281             DK[j--] = Z[k++];
 282             DK[j--] = t1;
 283             t1 = inv(Z[k++]);
 284             t2 = -Z[k++] & 0xffff;
 285             t3 = -Z[k++] & 0xffff;
 286             DK[j--] = inv(Z[k++]);
 287             DK[j--] = t2;
 288             DK[j--] = t3;
 289             DK[j--] = t1;
 290         }
 291
 292         t1 = Z[k++];
 293         DK[j--] = Z[k++];
 294         DK[j--] = t1;
 295         t1 = inv(Z[k++]);
 296         t2 = -Z[k++] & 0xffff;
 297         t3 = -Z[k++] & 0xffff;
 298         DK[j--] = inv(Z[k++]);
 299         DK[j--] = t3;
 300         DK[j--] = t2;
 301         DK[j--] = t1;
 302     }
 303
 304
 305
 306
 307
 308     //
 309     //mul
 310     //
 311     // Performs multiplication, modulo (2**16)+1. This code is structured
 312     // on the assumption that untaken branches are cheaper than taken
 313     // branches, and that the compiler doesn't schedule branches.
 314     // Java: Must work with 32-bit int and one 64-bit long to keep
 315     // 16-bit values and their products "unsigned." The routine assumes
 316     // that both a and b could fit in 16 bits even though they come in
 317     // as 32-bit ints. Lots of "& 0xFFFF" masks here to keep things 16-bit.
 318     // Also, because the routine stores mod (2**16)+1 results in a 2**16
 319     // space, the result is truncated to zero whenever the result would
 320     // zero, be 2**16. And if one of the multiplicands is 0, the result
 321     // is not zero, but (2**16) + 1 minus the other multiplicand (sort
 322     // of an additive inverse mod 0x10001).
 323
 324     // NOTE: The java conversion of this routine works correctly, but
 325     // is half the speed of using Java's modulus division function (%)
 326     // on the multiplication with a 16-bit masking of the result--running
 327     // in the Symantec Caje IDE. So it's not called for now; the test
 328     // uses Java % instead.
 329     //
 330
 331     private int mul(int a, int b)
 332     {
 333         int ret;
 334         long p;             // Large enough to catch 16-bit multiply
 335         // without hitting sign bit.
 336         if (a != 0)
 337         {
 338             if(b != 0)
 339             {
 340                 p = (long) a * b;
 341                 b = (int) p & 0xFFFF;       // Lower 16 bits.
 342                 a = (int) p >>> 16;         // Upper 16 bits.
 343                 if (b < a)
 344                     return (b - a + 1) & 0xFFFF;
 345                 else
 346                     return (b - a) & 0xFFFF;
 347             }
 348             else
 349                 return ((1 - a) & 0xFFFF);  // If b = 0, then same as
 350             // 0x10001 - a.
 351         }
 352         else                                // If a = 0, then return
 353             return((1 - b) & 0xFFFF);       // same as 0x10001 - b.
 354     }
 355
 356     //
 357     // inv
 358     //
 359     // Compute multiplicative inverse of x, modulo (2**16)+1 using
 360     // extended Euclid's GCD (greatest common divisor) algorithm.
 361     // It is unrolled twice to avoid swapping the meaning of
 362     // the registers. And some subtracts are changed to adds.
 363     // Java: Though it uses signed 32-bit ints, the interpretation
 364     // of the bits within is strictly unsigned 16-bit.
 365     //
 366
 367     private int inv(int x)
 368     {
 369         int t0, t1;
 370         int q, y;
 371
 372         if (x <= 1)             // Assumes positive x.
 373             return(x);          // 0 and 1 are self-inverse.
 374
 375         t1 = 0x10001 / x;       // (2**16+1)/x; x is >= 2, so fits 16 bits.
 376         y = 0x10001 % x;
 377         if (y == 1)
 378             return((1 - t1) & 0xFFFF);
 379
 380         t0 = 1;
 381         do {
 382             q = x / y;
 383             x = x % y;
 384             t0 += q * t1;
 385             if (x == 1) return(t0);
 386             q = y / x;
 387             y = y % x;
 388             t1 += q * t0;
 389         } while (y != 1);
 390
 391         return((1 - t1) & 0xFFFF);
 392     }
 393
 394     //
 395     // freeTestData
 396     //
 397     // Nulls arrays and forces garbage collection to free up memory.
 398     //
 399
 400     void freeTestData(int array_rows)
 401     {
 402         for(int i = 0; i<array_rows; i++) {
 403             plain1[i] = (byte) 0;
 404             crypt1[i] = (byte) 0;
 405             plain2[i] = (byte) 0;
 406         }
 407
 408         for(int i = 0; i<8; i++) {
 409             userkey[i] = (short) 0;
 410         }
 411
 412         for(int i = 0; i<52; i++) {
 413             Z[i] = 0;
 414             DK[i] = 0;
 415         }
 416
 417         //System.gc();                // Force garbage collection.
 418     }
 419
 420 }
 421
 422
 423
 424 class IDEARunner extends Thread {
 425
 426     int id,key[];
 427     byte text1[],text2[];
 428     int nthreads;
 429
 430     public IDEARunner(int id, byte [] text1, byte [] text2, int [] key, int nthreads) {
 431         this.id = id;
 432         this.text1=text1;
 433         this.text2=text2;
 434         this.key=key;
 435         this.nthreads = nthreads;
 436     }
 437     //
 438     // run()
 439     //
 440     // IDEA encryption/decryption algorithm. It processes plaintext in
 441     // 64-bit blocks, one at a time, breaking the block into four 16-bit
 442     // unsigned subblocks. It goes through eight rounds of processing
 443     // using 6 new subkeys each time, plus four for last step. The source
 444     // text is in array text1, the destination text goes into array text2
 445     // The routine represents 16-bit subblocks and subkeys as type int so
 446     // that they can be treated more easily as unsigned. Multiplication
 447     // modulo 0x10001 interprets a zero sub-block as 0x10000; it must to
 448     // fit in 16 bits.
 449     //
 450
 451     public void run() {
 452         int ilow, iupper, slice, tslice, ttslice;
 453
 454         tslice = text1.length / 8;
 455         ttslice = (tslice + nthreads-1) / nthreads;
 456         slice = ttslice*8;
 457
 458         ilow = id*slice;
 459         iupper = (id+1)*slice;
 460         if(iupper > text1.length) iupper = text1.length;
 461
 462         int i1 = ilow;                 // Index into first text array.
 463         int i2 = ilow;                 // Index into second text array.
 464         int ik;                     // Index into key array.
 465         int x1, x2, x3, x4, t1, t2; // Four "16-bit" blocks, two temps.
 466         int r;                      // Eight rounds of processing.
 467
 468         for (int i =ilow ; i <iupper ; i +=8)
 469         {
 470
 471             ik = 0;                 // Restart key index.
 472             r = 8;                  // Eight rounds of processing.
 473
 474             // Load eight plain1 bytes as four 16-bit "unsigned" integers.
 475             // Masking with 0xff prevents sign extension with cast to int.
 476
 477             x1 = text1[i1++] & 0xff;          // Build 16-bit x1 from 2 bytes,
 478             x1 |= (text1[i1++] & 0xff) << 8;  // assuming low-order byte first.
 479             x2 = text1[i1++] & 0xff;
 480             x2 |= (text1[i1++] & 0xff) << 8;
 481             x3 = text1[i1++] & 0xff;
 482             x3 |= (text1[i1++] & 0xff) << 8;
 483             x4 = text1[i1++] & 0xff;
 484             x4 |= (text1[i1++] & 0xff) << 8;
 485
 486             do {
 487                 // 1) Multiply (modulo 0x10001), 1st text sub-block
 488                 // with 1st key sub-block.
 489
 490                 x1 = (int) ((long) x1 * key[ik++] % 0x10001L & 0xffff);
 491
 492                 // 2) Add (modulo 0x10000), 2nd text sub-block
 493                 // with 2nd key sub-block.
 494
 495                 x2 = x2 + key[ik++] & 0xffff;
 496
 497                 // 3) Add (modulo 0x10000), 3rd text sub-block
 498                 // with 3rd key sub-block.
 499
 500                 x3 = x3 + key[ik++] & 0xffff;
 501
 502                 // 4) Multiply (modulo 0x10001), 4th text sub-block
 503                 // with 4th key sub-block.
 504
 505                 x4 = (int) ((long) x4 * key[ik++] % 0x10001L & 0xffff);
 506
 507                 // 5) XOR results from steps 1 and 3.
 508
 509                 t2 = x1 ^ x3;
 510
 511                 // 6) XOR results from steps 2 and 4.
 512                 // Included in step 8.
 513
 514                 // 7) Multiply (modulo 0x10001), result of step 5
 515                 // with 5th key sub-block.
 516
 517                 t2 = (int) ((long) t2 * key[ik++] % 0x10001L & 0xffff);
 518
 519                 // 8) Add (modulo 0x10000), results of steps 6 and 7.
 520
 521                 t1 = t2 + (x2 ^ x4) & 0xffff;
 522
 523                 // 9) Multiply (modulo 0x10001), result of step 8
 524                 // with 6th key sub-block.
 525
 526                 t1 = (int) ((long) t1 * key[ik++] % 0x10001L & 0xffff);
 527
 528                 // 10) Add (modulo 0x10000), results of steps 7 and 9.
 529
 530                 t2 = t1 + t2 & 0xffff;
 531
 532                 // 11) XOR results from steps 1 and 9.
 533
 534                 x1 ^= t1;
 535
 536                 // 14) XOR results from steps 4 and 10. (Out of order).
 537
 538                 x4 ^= t2;
 539
 540                 // 13) XOR results from steps 2 and 10. (Out of order).
 541
 542                 t2 ^= x2;
 543
 544                 // 12) XOR results from steps 3 and 9. (Out of order).
 545
 546                 x2 = x3 ^ t1;
 547
 548                 x3 = t2;        // Results of x2 and x3 now swapped.
 549
 550             } while(--r != 0);  // Repeats seven more rounds.
 551
 552             // Final output transform (4 steps).
 553
 554             // 1) Multiply (modulo 0x10001), 1st text-block
 555             // with 1st key sub-block.
 556
 557             x1 = (int) ((long) x1 * key[ik++] % 0x10001L & 0xffff);
 558
 559             // 2) Add (modulo 0x10000), 2nd text sub-block
 560             // with 2nd key sub-block. It says x3, but that is to undo swap
 561             // of subblocks 2 and 3 in 8th processing round.
 562
 563             x3 = x3 + key[ik++] & 0xffff;
 564
 565             // 3) Add (modulo 0x10000), 3rd text sub-block
 566             // with 3rd key sub-block. It says x2, but that is to undo swap
 567             // of subblocks 2 and 3 in 8th processing round.
 568
 569             x2 = x2 + key[ik++] & 0xffff;
 570
 571             // 4) Multiply (modulo 0x10001), 4th text-block
 572             // with 4th key sub-block.
 573
 574             x4 = (int) ((long) x4 * key[ik++] % 0x10001L & 0xffff);
 575
 576             // Repackage from 16-bit sub-blocks to 8-bit byte array text2.
 577
 578             text2[i2++] = (byte) x1;
 579             text2[i2++] = (byte) (x1 >>> 8);
 580             text2[i2++] = (byte) x3;                // x3 and x2 are switched
 581             text2[i2++] = (byte) (x3 >>> 8);        // only in name.
 582             text2[i2++] = (byte) x2;
 583             text2[i2++] = (byte) (x2 >>> 8);
 584             text2[i2++] = (byte) x4;
 585             text2[i2++] = (byte) (x4 >>> 8);
 586
 587         }   // End for loop.
 588
 589     }   // End routine.
 590 }  // End of class
 591
 592
 593
 594
 595
 596
 597
 598
 599