lib/Transforms/Scalar/InductionVars.cpp

   1 //===- InductionVars.cpp - Induction Variable Cannonicalization code --------=//
   2 //
   3 // This file implements induction variable cannonicalization of loops.
   4 //
   5 // Specifically, after this executes, the following is true:
   6 //   - There is a single induction variable for each loop (at least loops that
   7 //     used to contain at least one induction variable)
   8 //   * This induction variable starts at 0 and steps by 1 per iteration
   9 //   * This induction variable is represented by the first PHI node in the
  10 //     Header block, allowing it to be found easily.
  11 //   - All other preexisting induction variables are adjusted to operate in
  12 //     terms of this primary induction variable
  13 //   - Induction variables with a step size of 0 have been eliminated.
  14 //
  15 // This code assumes the following is true to perform its full job:
  16 //   - The CFG has been simplified to not have multiple entrances into an
  17 //     interval header.  Interval headers should only have two predecessors,
  18 //     one from inside of the loop and one from outside of the loop.
  19 //
  20 //===----------------------------------------------------------------------===//
  21
  22 #include "llvm/Optimizations/InductionVars.h"
  23 #include "llvm/ConstPoolVals.h"
  24 #include "llvm/Analysis/IntervalPartition.h"
  25 #include "llvm/Assembly/Writer.h"
  26 #include "llvm/Tools/STLExtras.h"
  27 #include "llvm/SymbolTable.h"
  28 #include "llvm/iOther.h"
  29 #include "llvm/CFG.h"
  30 #include <algorithm>
  31
  32 #include "llvm/Analysis/LoopDepth.h"
  33
  34 using namespace opt;
  35
  36 // isLoopInvariant - Return true if the specified value/basic block source is
  37 // an interval invariant computation.
  38 //
  39 static bool isLoopInvariant(cfg::Interval *Int, Value *V) {
  40   assert(V->isConstant() || V->isInstruction() || V->isMethodArgument());
  41
  42   if (!V->isInstruction())
  43     return true;  // Constants and arguments are always loop invariant
  44
  45   BasicBlock *ValueBlock = ((Instruction*)V)->getParent();
  46   assert(ValueBlock && "Instruction not embedded in basic block!");
  47
  48   // For now, only consider values from outside of the interval, regardless of
  49   // whether the expression could be lifted out of the loop by some LICM.
  50   //
  51   // TODO: invoke LICM library if we find out it would be useful.
  52   //
  53   return !Int->contains(ValueBlock);
  54 }
  55
  56
  57 // isLinearInductionVariableH - Return isLIV if the expression V is a linear
  58 // expression defined in terms of loop invariant computations, and a single
  59 // instance of the PHI node PN.  Return isLIC if the expression V is a loop
  60 // invariant computation.  Return isNLIV if the expression is a negated linear
  61 // induction variable.  Return isOther if it is neither.
  62 //
  63 // Currently allowed operators are: ADD, SUB, NEG
  64 // TODO: This should allow casts!
  65 //
  66 enum LIVType { isLIV, isLIC, isNLIV, isOther };
  67 //
  68 // neg - Negate the sign of a LIV expression.
  69 inline LIVType neg(LIVType T) {
  70   assert(T == isLIV || T == isNLIV && "Negate Only works on LIV expressions");
  71   return T == isLIV ? isNLIV : isLIV;
  72 }
  73 //
  74 static LIVType isLinearInductionVariableH(cfg::Interval *Int, Value *V,
  75                                           PHINode *PN) {
  76   if (V == PN) { return isLIV; }  // PHI node references are (0+PHI)
  77   if (isLoopInvariant(Int, V)) return isLIC;
  78
  79   // loop variant computations must be instructions!
  80   Instruction *I = V->castInstructionAsserting();
  81   switch (I->getInstType()) {       // Handle each instruction seperately
  82   case Instruction::Neg: {
  83     Value *SubV = ((UnaryOperator*)I)->getOperand(0);
  84     LIVType SubLIVType = isLinearInductionVariableH(Int, SubV, PN);
  85     switch (SubLIVType) {
  86     case isLIC:          // Loop invariant & other computations remain the same
  87     case isOther: return SubLIVType;
  88     case isLIV:          // Return the opposite signed LIV type
  89     case isNLIV:  return neg(isLIV);
  90     }
  91   }
  92   case Instruction::Add:
  93   case Instruction::Sub: {
  94     Value *SubV1 = ((BinaryOperator*)I)->getOperand(0);
  95     Value *SubV2 = ((BinaryOperator*)I)->getOperand(1);
  96     LIVType SubLIVType1 = isLinearInductionVariableH(Int, SubV1, PN);
  97     if (SubLIVType1 == isOther) return isOther;  // Early bailout
  98     LIVType SubLIVType2 = isLinearInductionVariableH(Int, SubV2, PN);
  99
 100     switch (SubLIVType2) {
 101     case isOther: return isOther;      // Unknown subexpression type
 102     case isLIC:   return SubLIVType1;  // Constant offset, return type #1
 103     case isLIV:
 104     case isNLIV:
 105       // So now we know that we have a linear induction variable on the RHS of
 106       // the ADD or SUB instruction.  SubLIVType1 cannot be isOther, so it is
 107       // either a Loop Invariant computation, or a LIV type.
 108       if (SubLIVType1 == isLIC) {
 109         // Loop invariant computation, we know this is a LIV then.
 110         return (I->getInstType() == Instruction::Add) ?
 111                        SubLIVType2 : neg(SubLIVType2);
 112       }
 113
 114       // If the LHS is also a LIV Expression, we cannot add two LIVs together
 115       if (I->getInstType() == Instruction::Add) return isOther;
 116
 117       // We can only subtract two LIVs if they are the same type, which yields
 118       // a LIC, because the LIVs cancel each other out.
 119       return (SubLIVType1 == SubLIVType2) ? isLIC : isOther;
 120     }
 121     // NOT REACHED
 122   }
 123
 124   default:            // Any other instruction is not a LINEAR induction var
 125     return isOther;
 126   }
 127 }
 128
 129 // isLinearInductionVariable - Return true if the specified expression is a
 130 // "linear induction variable", which is an expression involving a single
 131 // instance of the PHI node and a loop invariant value that is added or
 132 // subtracted to the PHI node.  This is calculated by walking the SSA graph
 133 //
 134 static inline bool isLinearInductionVariable(cfg::Interval *Int, Value *V,
 135                                              PHINode *PN) {
 136   return isLinearInductionVariableH(Int, V, PN) == isLIV;
 137 }
 138
 139
 140 // isSimpleInductionVar - Return true iff the cannonical induction variable PN
 141 // has an initializer of the constant value 0, and has a step size of constant
 142 // 1.
 143 static inline bool isSimpleInductionVar(PHINode *PN) {
 144   assert(PN->getNumIncomingValues() == 2 && "Must have cannonical PHI node!");
 145   Value *Initializer = PN->getIncomingValue(0);
 146   if (!Initializer->isConstant()) return false;
 147
 148   if (Initializer->getType()->isSigned()) {  // Signed constant value...
 149     if (((ConstPoolSInt*)Initializer)->getValue() != 0) return false;
 150   } else if (Initializer->getType()->isUnsigned()) {  // Unsigned constant value
 151     if (((ConstPoolUInt*)Initializer)->getValue() != 0) return false;
 152   } else {
 153     return false;   // Not signed or unsigned?  Must be FP type or something
 154   }
 155
 156   Value *StepExpr = PN->getIncomingValue(1);
 157   if (!StepExpr->isInstruction() ||
 158       ((Instruction*)StepExpr)->getInstType() != Instruction::Add)
 159     return false;
 160
 161   BinaryOperator *I = (BinaryOperator*)StepExpr;
 162   assert(I->getOperand(0)->isInstruction() &&
 163       ((Instruction*)I->getOperand(0))->isPHINode() &&
 164          "PHI node should be first operand of ADD instruction!");
 165
 166   // Get the right hand side of the ADD node.  See if it is a constant 1.
 167   Value *StepSize = I->getOperand(1);
 168   if (!StepSize->isConstant()) return false;
 169
 170   if (StepSize->getType()->isSigned()) {  // Signed constant value...
 171     if (((ConstPoolSInt*)StepSize)->getValue() != 1) return false;
 172   } else if (StepSize->getType()->isUnsigned()) {  // Unsigned constant value
 173     if (((ConstPoolUInt*)StepSize)->getValue() != 1) return false;
 174   } else {
 175     return false;   // Not signed or unsigned?  Must be FP type or something
 176   }
 177
 178   // At this point, we know the initializer is a constant value 0 and the step
 179   // size is a constant value 1.  This is our simple induction variable!
 180   return true;
 181 }
 182
 183 // InjectSimpleInductionVariable - Insert a cannonical induction variable into
 184 // the interval header Header.  This assumes that the flow graph is in
 185 // simplified form (so we know that the header block has exactly 2 predecessors)
 186 //
 187 // TODO: This should inherit the largest type that is being used by the already
 188 // present induction variables (instead of always using uint)
 189 //
 190 static PHINode *InjectSimpleInductionVariable(cfg::Interval *Int) {
 191   string PHIName, AddName;
 192
 193   BasicBlock *Header = Int->getHeaderNode();
 194   Method *M = Header->getParent();
 195
 196   if (M->hasSymbolTable()) {
 197     // Only name the induction variable if the method isn't stripped.
 198     PHIName = M->getSymbolTable()->getUniqueName(Type::UIntTy, "ind_var");
 199     AddName = M->getSymbolTable()->getUniqueName(Type::UIntTy, "ind_var_next");
 200   }
 201
 202   // Create the neccesary instructions...
 203   PHINode        *PN      = new PHINode(Type::UIntTy, PHIName);
 204   ConstPoolVal   *One     = new ConstPoolUInt(Type::UIntTy, 1);
 205   ConstPoolVal   *Zero    = new ConstPoolUInt(Type::UIntTy, 0);
 206   BinaryOperator *AddNode = BinaryOperator::create(Instruction::Add,
 207                                                    PN, One, AddName);
 208
 209   // Figure out which predecessors I have to play with... there should be
 210   // exactly two... one of which is a loop predecessor, and one of which is not.
 211   //
 212   cfg::pred_iterator PI = cfg::pred_begin(Header);
 213   assert(PI != cfg::pred_end(Header) && "Header node should have 2 preds!");
 214   BasicBlock *Pred1 = *PI; ++PI;
 215   assert(PI != cfg::pred_end(Header) && "Header node should have 2 preds!");
 216   BasicBlock *Pred2 = *PI;
 217   assert(++PI == cfg::pred_end(Header) && "Header node should have 2 preds!");
 218
 219   // Make Pred1 be the loop entrance predecessor, Pred2 be the Loop predecessor
 220   if (Int->contains(Pred1)) swap(Pred1, Pred2);
 221
 222   assert(!Int->contains(Pred1) && "Pred1 should be loop entrance!");
 223   assert( Int->contains(Pred2) && "Pred2 should be looping edge!");
 224
 225   // Link the instructions into the PHI node...
 226   PN->addIncoming(Zero, Pred1);     // The initializer is first argument
 227   PN->addIncoming(AddNode, Pred2);  // The step size is second PHI argument
 228
 229   // Insert the PHI node into the Header of the loop.  It shall be the first
 230   // instruction, because the "Simple" Induction Variable must be first in the
 231   // block.
 232   //
 233   BasicBlock::InstListType &IL = Header->getInstList();
 234   IL.push_front(PN);
 235
 236   // Insert the Add instruction as the first (non-phi) instruction in the
 237   // header node's basic block.
 238   BasicBlock::iterator I = IL.begin();
 239   while ((*I)->isPHINode()) ++I;
 240   IL.insert(I, AddNode);
 241
 242   // Insert the constants into the constant pool for the method...
 243   M->getConstantPool().insert(One);
 244   M->getConstantPool().insert(Zero);
 245   return PN;
 246 }
 247
 248 // ProcessInterval - This function is invoked once for each interval in the
 249 // IntervalPartition of the program.  It looks for auxilliary induction
 250 // variables in loops.  If it finds one, it:
 251 // * Cannonicalizes the induction variable.  This consists of:
 252 //   A. Making the first element of the PHI node be the loop invariant
 253 //      computation, and the second element be the linear induction portion.
 254 //   B. Changing the first element of the linear induction portion of the PHI
 255 //      node to be of the form ADD(PHI, <loop invariant expr>).
 256 // * Add the induction variable PHI to a list of induction variables found.
 257 //
 258 // After this, a list of cannonical induction variables is known.  This list
 259 // is searched to see if there is an induction variable that counts from
 260 // constant 0 with a step size of constant 1.  If there is not one, one is
 261 // injected into the loop.  Thus a "simple" induction variable is always known
 262 //
 263 // One a simple induction variable is known, all other induction variables are
 264 // modified to refer to the "simple" induction variable.
 265 //
 266 static bool ProcessInterval(cfg::Interval *Int) {
 267   if (!Int->isLoop()) return false;  // Not a loop?  Ignore it!
 268
 269   vector<PHINode *> InductionVars;
 270
 271   BasicBlock *Header = Int->getHeaderNode();
 272   // Loop over all of the PHI nodes in the interval header...
 273   for (BasicBlock::iterator I = Header->begin(), E = Header->end();
 274        I != E && (*I)->isPHINode(); ++I) {
 275     PHINode *PN = (PHINode*)*I;
 276     if (PN->getNumIncomingValues() != 2) { // These should be eliminated by now.
 277       cerr << "Found interval header with more than 2 predecessors! Ignoring\n";
 278       return false;    // Todo, make an assertion.
 279     }
 280
 281     // For this to be an induction variable, one of the arguments must be a
 282     // loop invariant expression, and the other must be an expression involving
 283     // the PHI node, along with possible additions and subtractions of loop
 284     // invariant values.
 285     //
 286     BasicBlock *BB1 = PN->getIncomingBlock(0);
 287     Value      *V1  = PN->getIncomingValue(0);
 288     BasicBlock *BB2 = PN->getIncomingBlock(1);
 289     Value      *V2  = PN->getIncomingValue(1);
 290
 291     // Figure out which computation is loop invariant...
 292     if (!isLoopInvariant(Int, V1)) {
 293       // V1 is *not* loop invariant.  Check to see if V2 is:
 294       if (isLoopInvariant(Int, V2)) {
 295         // They *are* loop invariant.  Exchange BB1/BB2 and V1/V2 so that
 296         // V1 is always the loop invariant computation.
 297         swap(V1, V2); swap(BB1, BB2);
 298       } else {
 299         // Neither value is loop invariant.  Must not be an induction variable.
 300         // This case can happen if there is an unreachable loop in the CFG that
 301         // has two tail loops in it that was not split by the cleanup phase
 302         // before.
 303         continue;
 304       }
 305     }
 306
 307     // At this point, we know that BB1/V1 are loop invariant.  We don't know
 308     // anything about BB2/V2.  Check now to see if V2 is a linear induction
 309     // variable.
 310     //
 311     cerr << "Found loop invariant computation: " << V1 << endl;
 312
 313     if (!isLinearInductionVariable(Int, V2, PN))
 314       continue;         // No, it is not a linear ind var, ignore the PHI node.
 315     cerr << "Found linear induction variable: " << V2;
 316
 317     // TODO: Cannonicalize V2
 318
 319     // Add this PHI node to the list of induction variables found...
 320     InductionVars.push_back(PN);
 321   }
 322
 323   // No induction variables found?
 324   if (InductionVars.empty()) return false;
 325
 326   // Search to see if there is already a "simple" induction variable.
 327   vector<PHINode*>::iterator It =
 328     find_if(InductionVars.begin(), InductionVars.end(), isSimpleInductionVar);
 329
 330   PHINode *PrimaryIndVar;
 331
 332   // A simple induction variable was not found, inject one now...
 333   if (It == InductionVars.end()) {
 334     PrimaryIndVar = InjectSimpleInductionVariable(Int);
 335   } else {
 336     // Move the PHI node for this induction variable to the start of the PHI
 337     // list in HeaderNode... we do not need to do this for the inserted case
 338     // because the inserted node will always be placed at the beginning of
 339     // HeaderNode.
 340     //
 341     PrimaryIndVar = *It;
 342     BasicBlock::iterator i =
 343       find(Header->begin(), Header->end(), PrimaryIndVar);
 344     assert(i != Header->end() &&
 345            "How could Primary IndVar not be in the header!?!!?");
 346
 347     if (i != Header->begin())
 348       iter_swap(i, Header->begin());
 349   }
 350
 351   // Now we know that there is a simple induction variable PrimaryIndVar.
 352   // Simplify all of the other induction variables to use this induction
 353   // variable as their counter, and destroy the PHI nodes that correspond to
 354   // the old indvars.
 355   //
 356   // TODO
 357
 358
 359   cerr << "Found Interval Header with indvars (primary indvar should be first "
 360        << "phi): \n" << Header << "\nPrimaryIndVar: " << PrimaryIndVar;
 361
 362   return false;  // TODO: true;
 363 }
 364
 365
 366 // ProcessIntervalPartition - This function loops over the interval partition
 367 // processing each interval with ProcessInterval
 368 //
 369 static bool ProcessIntervalPartition(cfg::IntervalPartition &IP) {
 370   // This currently just prints out information about the interval structure
 371   // of the method...
 372 #if 0
 373   static unsigned N = 0;
 374   cerr << "\n***********Interval Partition #" << (++N) << "************\n\n";
 375   copy(IP.begin(), IP.end(), ostream_iterator<cfg::Interval*>(cerr, "\n"));
 376
 377   cerr << "\n*********** PERFORMING WORK ************\n\n";
 378 #endif
 379   // Loop over all of the intervals in the partition and look for induction
 380   // variables in intervals that represent loops.
 381   //
 382   return reduce_apply(IP.begin(), IP.end(), bitwise_or<bool>(), false,
 383                       ptr_fun(ProcessInterval));
 384 }
 385
 386 // DoInductionVariableCannonicalize - Simplify induction variables in loops.
 387 // This function loops over an interval partition of a program, reducing it
 388 // until the graph is gone.
 389 //
 390 bool opt::DoInductionVariableCannonicalize(Method *M) {
 391   // TODO: REMOVE
 392   if (0) {   // Print basic blocks with their depth
 393     LoopDepthCalculator LDC(M);
 394     for (Method::iterator I = M->begin(); I != M->end(); ++I) {
 395       cerr << "Basic Block Depth: " << LDC.getLoopDepth(*I) << *I;
 396     }
 397   }
 398
 399
 400   cfg::IntervalPartition *IP = new cfg::IntervalPartition(M);
 401   bool Changed = false;
 402
 403   while (!IP->isDegeneratePartition()) {
 404     Changed |= ProcessIntervalPartition(*IP);
 405
 406     // Calculate the reduced version of this graph until we get to an
 407     // irreducible graph or a degenerate graph...
 408     //
 409     cfg::IntervalPartition *NewIP = new cfg::IntervalPartition(*IP, false);
 410     if (NewIP->size() == IP->size()) {
 411       cerr << "IRREDUCIBLE GRAPH FOUND!!!\n";
 412       return Changed;
 413     }
 414     delete IP;
 415     IP = NewIP;
 416   }
 417
 418   delete IP;
 419   return Changed;
 420 }