cache results of operator*

[oota-llvm.git] / lib / Transforms / Scalar / GVN.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index f015def98f0233fd309a60aab85b6a8a14d47003..88b67768fa5dfbb8869d3b1bb40cda534e970b4f 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -35,6 +35,7 @@
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  #include "llvm/Analysis/PHITransAddr.h"
@@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) {
    e.function = C->getCalledFunction();
    e.opcode = Expression::CALL;
  
-  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+  CallSite CS(C);
+  for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
         I != E; ++I)
      e.varargs.push_back(lookup_or_add(*I));
  
@@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
      if (local_dep.isDef()) {
        CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
  
-      if (local_cdep->getNumOperands() != C->getNumOperands()) {
+      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
          valueNumbering[C] = nextValueNumber;
          return nextValueNumber++;
        }
  
-      for (unsigned i = 1; i < C->getNumOperands(); ++i) {
-        uint32_t c_vn = lookup_or_add(C->getOperand(i));
-        uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
          if (c_vn != cd_vn) {
            valueNumbering[C] = nextValueNumber;
            return nextValueNumber++;
@@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
        return nextValueNumber++;
      }
  
-    if (cdep->getNumOperands() != C->getNumOperands()) {
+    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
        valueNumbering[C] = nextValueNumber;
        return nextValueNumber++;
      }
-    for (unsigned i = 1; i < C->getNumOperands(); ++i) {
-      uint32_t c_vn = lookup_or_add(C->getOperand(i));
-      uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
        if (c_vn != cd_vn) {
          valueNumbering[C] = nextValueNumber;
          return nextValueNumber++;
@@ -662,11 +664,10 @@ namespace {
      bool runOnFunction(Function &F);
    public:
      static char ID; // Pass identification, replacement for typeid
-    explicit GVN(bool nopre = false, bool noloads = false)
-      : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
+    explicit GVN(bool noloads = false)
+      : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
  
    private:
-    bool NoPRE;
      bool NoLoads;
      MemoryDependenceAnalysis *MD;
      DominatorTree *DT;
@@ -711,8 +712,8 @@ namespace {
  }
  
  // createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
-  return new GVN(NoPRE, NoLoads);
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+  return new GVN(NoLoads);
  }
  
  static RegisterPass<GVN> X("gvn",
@@ -869,7 +870,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
    
    const Type *StoredValTy = StoredVal->getType();
    
-  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
    uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
    
    // If the store and reload are the same size, we can always reuse it.
@@ -1005,18 +1006,18 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
    
    // If the load and store are to the exact same address, they should have been
    // a must alias.  AA must have gotten confused.
-  // FIXME: Study to see if/when this happens.
-  if (LoadOffset == StoreOffset) {
+  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
+  // to a load from the base of the memset.
  #if 0
+  if (LoadOffset == StoreOffset) {
      dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
      << "Base       = " << *StoreBase << "\n"
      << "Store Ptr  = " << *WritePtr << "\n"
      << "Store Offs = " << StoreOffset << "\n"
      << "Load Ptr   = " << *LoadPtr << "\n";
      abort();
-#endif
-    return -1;
    }
+#endif
    
    // If the load and store don't overlap at all, the store doesn't provide
    // anything to the load.  In this case, they really don't alias at all, AA
@@ -1032,11 +1033,11 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
    
    
    bool isAAFailure = false;
-  if (StoreOffset < LoadOffset) {
+  if (StoreOffset < LoadOffset)
      isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
-  } else {
+  else
      isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
-  }
+
    if (isAAFailure) {
  #if 0
      dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
@@ -1133,8 +1134,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
                                     Instruction *InsertPt, const TargetData &TD){
    LLVMContext &Ctx = SrcVal->getType()->getContext();
    
-  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
-  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
    
    IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
    
@@ -1218,7 +1219,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
    return ConstantFoldLoadFromConstPtr(Src, &TD);
  }
  
-
+namespace {
  
  struct AvailableValueInBlock {
    /// BB - The basic block in question.
@@ -1292,6 +1293,8 @@ struct AvailableValueInBlock {
    }
  };
  
+}
+
  /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
  /// construct SSA form, allowing us to eliminate LI.  This returns the value
  /// that should be used at LI's definition site.
@@ -1334,8 +1337,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
    return V;
  }
  
-static bool isLifetimeStart(Instruction *Inst) {
-  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+static bool isLifetimeStart(const Instruction *Inst) {
+  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
      return II->getIntrinsicID() == Intrinsic::lifetime_start;
    return false;
  }
@@ -1499,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
        MD->invalidateCachedPointerInfo(V);
      VN.erase(LI);
      toErase.push_back(LI);
-    NumGVNLoad++;
+    ++NumGVNLoad;
      return true;
    }
  
@@ -1543,11 +1546,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    // at least one of the values is LI.  Since this means that we won't be able
    // to eliminate LI even if we insert uses in the other predecessors, we will
    // end up increasing code size.  Reject this by scanning for LI.
-  if (!EnableFullLoadPRE) {
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (ValuesPerBlock[i].isSimpleValue() &&
-          ValuesPerBlock[i].getSimpleValue() == LI)
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+    if (ValuesPerBlock[i].isSimpleValue() &&
+        ValuesPerBlock[i].getSimpleValue() == LI) {
+      // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
+      if (!EnableFullLoadPRE || e == 1)
          return false;
+    }
    }
  
    // FIXME: It is extremely unclear what this loop is doing, other than
@@ -1581,6 +1586,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
    for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
      FullyAvailableBlocks[UnavailableBlocks[i]] = false;
  
+  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
    for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
         PI != E; ++PI) {
      BasicBlock *Pred = *PI;
@@ -1596,10 +1602,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
          return false;
        }
        unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
-      toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
-      return false;
+      NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
      }
    }
+  if (!NeedToSplit.empty()) {
+    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
+    return false;
+  }
  
    // Decide whether PRE is profitable for this load.
    unsigned NumUnavailablePreds = PredLoads.size();
@@ -1633,13 +1642,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
        LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
                                                    *DT, NewInsts);
      } else {
-      Address.PHITranslateValue(LoadBB, UnavailablePred);
+      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
        LoadPtr = Address.getAddr();
-    
-      // Make sure the value is live in the predecessor.
-      if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
-        if (!DT->dominates(Inst->getParent(), UnavailablePred))
-          LoadPtr = 0;
      }
  
      // If we couldn't find or insert a computation of this phi translated value,
@@ -1707,6 +1711,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
      // Add the newly created load.
      ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
                                                          NewLoad));
+    MD->invalidateCachedPointerInfo(LoadPtr);
+    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
    }
  
    // Perform PHI construction.
@@ -1719,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
      MD->invalidateCachedPointerInfo(V);
    VN.erase(LI);
    toErase.push_back(LI);
-  NumPRELoad++;
+  ++NumPRELoad;
    return true;
  }
  
@@ -1780,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
          MD->invalidateCachedPointerInfo(AvailVal);
        VN.erase(L);
        toErase.push_back(L);
-      NumGVNLoad++;
+      ++NumGVNLoad;
        return true;
      }
          
@@ -1826,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
        MD->invalidateCachedPointerInfo(StoredVal);
      VN.erase(L);
      toErase.push_back(L);
-    NumGVNLoad++;
+    ++NumGVNLoad;
      return true;
    }
  
@@ -1856,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
        MD->invalidateCachedPointerInfo(DepLI);
      VN.erase(L);
      toErase.push_back(L);
-    NumGVNLoad++;
+    ++NumGVNLoad;
      return true;
    }
  
@@ -1867,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
      L->replaceAllUsesWith(UndefValue::get(L->getType()));
      VN.erase(L);
      toErase.push_back(L);
-    NumGVNLoad++;
+    ++NumGVNLoad;
      return true;
    }
    
@@ -1878,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
        L->replaceAllUsesWith(UndefValue::get(L->getType()));
        VN.erase(L);
        toErase.push_back(L);
-      NumGVNLoad++;
+      ++NumGVNLoad;
        return true;
      }
    }
@@ -2010,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) {
      BasicBlock *BB = FI;
      ++FI;
      bool removedBlock = MergeBlockIntoPredecessor(BB, this);
-    if (removedBlock) NumGVNBlocks++;
+    if (removedBlock) ++NumGVNBlocks;
  
      Changed |= removedBlock;
    }
@@ -2122,27 +2128,28 @@ bool GVN::performPRE(Function &F) {
  
        for (pred_iterator PI = pred_begin(CurrentBlock),
             PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+        BasicBlock *P = *PI;
          // We're not interested in PRE where the block is its
          // own predecessor, or in blocks with predecessors
          // that are not reachable.
-        if (*PI == CurrentBlock) {
+        if (P == CurrentBlock) {
            NumWithout = 2;
            break;
-        } else if (!localAvail.count(*PI))  {
+        } else if (!localAvail.count(P))  {
            NumWithout = 2;
            break;
          }
  
          DenseMap<uint32_t, Value*>::iterator predV =
-                                            localAvail[*PI]->table.find(ValNo);
-        if (predV == localAvail[*PI]->table.end()) {
-          PREPred = *PI;
-          NumWithout++;
+                                            localAvail[P]->table.find(ValNo);
+        if (predV == localAvail[P]->table.end()) {
+          PREPred = P;
+          ++NumWithout;
          } else if (predV->second == CurInst) {
            NumWithout = 2;
          } else {
-          predMap[*PI] = predV->second;
-          NumWith++;
+          predMap[P] = predV->second;
+          ++NumWith;
          }
        }
  
@@ -2197,7 +2204,7 @@ bool GVN::performPRE(Function &F) {
        PREInstr->setName(CurInst->getName() + ".pre");
        predMap[PREPred] = PREInstr;
        VN.add(PREInstr, ValNo);
-      NumGVNPRE++;
+      ++NumGVNPRE;
  
        // Update the availability map to include the new instruction.
        localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
@@ -2207,8 +2214,10 @@ bool GVN::performPRE(Function &F) {
                                       CurInst->getName() + ".pre-phi",
                                       CurrentBlock->begin());
        for (pred_iterator PI = pred_begin(CurrentBlock),
-           PE = pred_end(CurrentBlock); PI != PE; ++PI)
-        Phi->addIncoming(predMap[*PI], *PI);
+           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        Phi->addIncoming(predMap[P], P);
+      }
  
        VN.add(Phi, ValNo);
        localAvail[CurrentBlock]->table[ValNo] = Phi;
@@ -2241,7 +2250,7 @@ bool GVN::splitCriticalEdges() {
      std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
      SplitCriticalEdge(Edge.first, Edge.second, this);
    } while (!toSplit.empty());
-  MD->invalidateCachedPredecessors();
+  if (MD) MD->invalidateCachedPredecessors();
    return true;
  }