refactor a blob of code out to a new 'FoldOrOfFCmps' function and

[oota-llvm.git] / lib / Transforms / Scalar / MemCpyOptimizer.cpp
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

index f990ba870ec162645e3ff8ac10ea02eaefdf5e30..8937e1c86334c2274c0b821d7ffcc6ebced64c7e 100644 (file)
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,23 +14,14 @@
  
  #define DEBUG_TYPE "memcpyopt"
  #include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
  #include "llvm/IntrinsicInst.h"
  #include "llvm/Instructions.h"
-#include "llvm/ParameterAttributes.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Target/TargetData.h"
@@ -40,19 +31,12 @@ using namespace llvm;
  STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
  STATISTIC(NumMemSetInfer, "Number of memsets inferred");
  
-namespace {
-  cl::opt<bool>
-  FormMemSet("form-memset-from-stores",
-             cl::desc("Transform straight-line stores to memsets"),
-             cl::init(true), cl::Hidden);
-}
-
  /// isBytewiseValue - If the specified value can be set by repeating the same
  /// byte in memory, return the i8 value that it is represented with.  This is
  /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
  /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
  /// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
+static Value *isBytewiseValue(Value *V, LLVMContext& Context) {
    // All byte-wide stores are splatable, even of arbitrary variables.
    if (V->getType() == Type::Int8Ty) return V;
    
@@ -60,9 +44,9 @@ static Value *isBytewiseValue(Value *V) {
    // corresponding integer value is "byteable".  An important case is 0.0. 
    if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
      if (CFP->getType() == Type::FloatTy)
-      V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
+      V = Context.getConstantExprBitCast(CFP, Type::Int32Ty);
      if (CFP->getType() == Type::DoubleTy)
-      V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
+      V = Context.getConstantExprBitCast(CFP, Type::Int64Ty);
      // Don't handle long double formats, which have strange constraints.
    }
    
@@ -85,7 +69,7 @@ static Value *isBytewiseValue(Value *V) {
          if (Val != Val2)
            return 0;
        }
-      return ConstantInt::get(Val);
+      return Context.getConstantInt(Val);
      }
    }
    
@@ -121,7 +105,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
      
      // Otherwise, we have a sequential type like an array or vector.  Multiply
      // the index by the ElementSize.
-    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
      Offset += Size*OpC->getSExtValue();
    }
  
@@ -294,7 +278,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
    // End.
    if (End > I->End) {
      I->End = End;
-    range_iterator NextI = I;;
+    range_iterator NextI = I;
      while (++NextI != E && End >= NextI->Start) {
        // Merge the range in.
        I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
@@ -316,7 +300,7 @@ namespace {
      bool runOnFunction(Function &F);
    public:
      static char ID; // Pass identification, replacement for typeid
-    MemCpyOpt() : FunctionPass((intptr_t)&ID) { }
+    MemCpyOpt() : FunctionPass(&ID) {}
  
    private:
      // This transformation requires dominator postdominator info
@@ -332,13 +316,9 @@ namespace {
      }
    
      // Helper fuctions
-    bool processInstruction(Instruction* I,
-                            SmallVectorImpl<Instruction*> &toErase);
-    bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase);
-    bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
-                       SmallVectorImpl<Instruction*> &toErase);
-    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C,
-                              SmallVectorImpl<Instruction*> &toErase);
+    bool processStore(StoreInst *SI, BasicBlock::iterator& BBI);
+    bool processMemCpy(MemCpyInst* M);
+    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C);
      bool iterateOnFunction(Function &F);
    };
    
@@ -357,8 +337,7 @@ static RegisterPass<MemCpyOpt> X("memcpyopt",
  /// some other patterns to fold away.  In particular, this looks for stores to
  /// neighboring locations of memory.  If it sees enough consequtive ones
  /// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
-  if (!FormMemSet) return false;
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
    if (SI->isVolatile()) return false;
    
    // There are two cases that are interesting for this code to handle: memcpy
@@ -367,7 +346,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toEra
    // Ensure that the value being stored is something that can be memset'able a
    // byte at a time like "0" or "-1" or any width, as well as things like
    // 0xA0A0A0A0 and 0.0.
-  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
+  Value *ByteVal = isBytewiseValue(SI->getOperand(0), SI->getContext());
    if (!ByteVal)
      return false;
  
@@ -406,7 +385,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toEra
      if (NextStore->isVolatile()) break;
      
      // Check to see if this stored value is of the same byte-splattable value.
-    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+    if (ByteVal != isBytewiseValue(NextStore->getOperand(0), 
+                                   NextStore->getContext()))
        break;
  
      // Check to see if this store is to a constant offset from the start ptr.
@@ -449,23 +429,28 @@ bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toEra
      // instruction needed by the start of the block.
      BasicBlock::iterator InsertPt = BI;
    
-    if (MemSetF == 0)
+    if (MemSetF == 0) {
+      const Type *Tys[] = {Type::Int64Ty};
        MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
-                                          ->getParent(), Intrinsic::memset_i64);
+                                          ->getParent(), Intrinsic::memset,
+                                          Tys, 1);
+   }
      
      // Get the starting pointer of the block.
      StartPtr = Range.StartPtr;
    
      // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
+    const Type *i8Ptr = SI->getContext().getPointerTypeUnqual(Type::Int8Ty);
      if (StartPtr->getType() != i8Ptr)
        StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
                                   InsertPt);
    
      Value *Ops[] = {
        StartPtr, ByteVal,   // Start, value
-      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),  // size
-      ConstantInt::get(Type::Int32Ty, Range.Alignment)   // align
+      // size
+      SI->getContext().getConstantInt(Type::Int64Ty, Range.End-Range.Start),
+      // align
+      SI->getContext().getConstantInt(Type::Int32Ty, Range.Alignment)
      };
      Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
      DEBUG(cerr << "Replace stores:\n";
@@ -473,8 +458,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toEra
              cerr << *Range.TheStores[i];
            cerr << "With: " << *C); C=C;
    
+    // Don't invalidate the iterator
+    BBI = BI;
+  
      // Zap all the stores.
-    toErase.append(Range.TheStores.begin(), Range.TheStores.end());
+    for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(),
+         SE = Range.TheStores.end(); SI != SE; ++SI)
+      (*SI)->eraseFromParent();
      ++NumMemSetInfer;
      MadeChange = true;
    }
@@ -486,8 +476,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toEra
  /// performCallSlotOptzn - takes a memcpy and a call that it depends on,
  /// and checks for the possibility of a call slot optimization by having
  /// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
-                               SmallVectorImpl<Instruction*> &toErase) {
+bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
    // The general transformation to keep in mind is
    //
    //   call @func(..., src, ...)
@@ -526,7 +515,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
    if (!srcArraySize)
      return false;
  
-  uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) *
+  uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) *
      srcArraySize->getZExtValue();
  
    if (cpyLength->getZExtValue() < srcSize)
@@ -541,7 +530,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
      if (!destArraySize)
        return false;
  
-    uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) *
+    uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) *
        destArraySize->getZExtValue();
  
      if (destSize < srcSize)
@@ -553,7 +542,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
        return false;
  
      const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
-    uint64_t destSize = TD.getABITypeSize(StructTy);
+    uint64_t destSize = TD.getTypeAllocSize(StructTy);
  
      if (destSize < srcSize)
        return false;
@@ -571,10 +560,17 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
      User* UI = srcUseList.back();
      srcUseList.pop_back();
  
-    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+    if (isa<BitCastInst>(UI)) {
        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
             I != E; ++I)
          srcUseList.push_back(*I);
+    } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) {
+      if (G->hasAllZeroIndices())
+        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+             I != E; ++I)
+          srcUseList.push_back(*I);
+      else
+        return false;
      } else if (UI != C && UI != cpy) {
        return false;
      }
@@ -597,22 +593,32 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
      return false;
  
    // All the checks have passed, so do the transformation.
+  bool changedArgument = false;
    for (unsigned i = 0; i < CS.arg_size(); ++i)
-    if (CS.getArgument(i) == cpySrc) {
+    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
        if (cpySrc->getType() != cpyDest->getType())
-        cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(),
+        cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                                cpyDest->getName(), C);
-      CS.setArgument(i, cpyDest);
+      changedArgument = true;
+      if (CS.getArgument(i)->getType() != cpyDest->getType())
+        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
+                       CS.getArgument(i)->getType(), cpyDest->getName(), C));
+      else
+        CS.setArgument(i, cpyDest);
      }
  
+  if (!changedArgument)
+    return false;
+
    // Drop any cached information about the call, because we may have changed
    // its dependence information by changing its parameter.
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  MD.dropInstruction(C);
+  MD.removeInstruction(C);
  
    // Remove the memcpy
    MD.removeInstruction(cpy);
-  toErase.push_back(cpy);
+  cpy->eraseFromParent();
+  NumMemCpyInstr++;
  
    return true;
  }
@@ -621,8 +627,23 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
  /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
  /// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
  ///  This allows later passes to remove the first memcpy altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
-                        SmallVectorImpl<Instruction*> &toErase) {
+bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
+  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+  // The are two possible optimizations we can do for memcpy:
+  //   a) memcpy-memcpy xform which exposes redundance for DSE
+  //   b) call-memcpy xform for return slot optimization
+  MemDepResult dep = MD.getDependency(M);
+  if (!dep.isClobber())
+    return false;
+  if (!isa<MemCpyInst>(dep.getInst())) {
+    if (CallInst* C = dyn_cast<CallInst>(dep.getInst()))
+      return performCallSlotOptzn(M, C);
+    return false;
+  }
+  
+  MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst());
+  
    // We can only transforms memcpy's where the dest of one is the source of the
    // other
    if (M->getSource() != MDep->getDest())
@@ -655,54 +676,32 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
      return false;
    
    // If all checks passed, then we can transform these memcpy's
+  const Type *Tys[1];
+  Tys[0] = M->getLength()->getType();
    Function* MemCpyFun = Intrinsic::getDeclaration(
                                   M->getParent()->getParent()->getParent(),
-                                 M->getIntrinsicID());
+                                 M->getIntrinsicID(), Tys, 1);
      
-  std::vector<Value*> args;
-  args.push_back(M->getRawDest());
-  args.push_back(MDep->getRawSource());
-  args.push_back(M->getLength());
-  args.push_back(M->getAlignment());
+  Value *Args[4] = {
+    M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+  };
    
-  CallInst* C = CallInst::Create(MemCpyFun, args.begin(), args.end(), "", M);
+  CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
    
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  if (MD.getDependency(C) == MDep) {
-    MD.dropInstruction(M);
-    toErase.push_back(M);
+  
+  // If C and M don't interfere, then this is a valid transformation.  If they
+  // did, this would mean that the two sources overlap, which would be bad.
+  if (MD.getDependency(C) == dep) {
+    MD.removeInstruction(M);
+    M->eraseFromParent();
+    NumMemCpyInstr++;
      return true;
    }
    
+  // Otherwise, there was no point in doing this, so we remove the call we
+  // inserted and act like nothing happened.
    MD.removeInstruction(C);
-  toErase.push_back(C);
-  return false;
-}
-
-/// processInstruction - When calculating availability, handle an instruction
-/// by inserting it into the appropriate sets
-bool MemCpyOpt::processInstruction(Instruction *I,
-                                   SmallVectorImpl<Instruction*> &toErase) {
-  if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return processStore(SI, toErase);
-  
-  if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
-    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-
-    // The are two possible optimizations we can do for memcpy:
-    //   a) memcpy-memcpy xform which exposes redundance for DSE
-    //   b) call-memcpy xform for return slot optimization
-    Instruction* dep = MD.getDependency(M);
-    if (dep == MemoryDependenceAnalysis::None ||
-        dep == MemoryDependenceAnalysis::NonLocal)
-      return false;
-    if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
-      return processMemCpy(M, MemCpy, toErase);
-    if (CallInst* C = dyn_cast<CallInst>(dep))
-      return performCallSlotOptzn(M, C, toErase);
-    return false;
-  }
-  
+  C->eraseFromParent();
    return false;
  }
  
@@ -726,42 +725,19 @@ bool MemCpyOpt::runOnFunction(Function& F) {
  // MemCpyOpt::iterateOnFunction - Executes one iteration of GVN
  bool MemCpyOpt::iterateOnFunction(Function &F) {
    bool changed_function = false;
-  
-  DominatorTree &DT = getAnalysis<DominatorTree>();   
-  
-  SmallVector<Instruction*, 8> toErase;
  
-  // Top-down walk of the dominator tree
-  for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
-         E = df_end(DT.getRootNode()); DI != E; ++DI) {
-
-    BasicBlock* BB = DI->getBlock();
+  // Walk all instruction in the function
+  for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
           BI != BE;) {
-      changed_function |= processInstruction(BI, toErase);
-      if (toErase.empty()) {
-        ++BI;
-        continue;
-      }
+      // Avoid invalidating the iterator
+      Instruction* I = BI++;
        
-      // If we need some instructions deleted, do it now.
-      NumMemCpyInstr += toErase.size();
-      
-      // Avoid iterator invalidation.
-      bool AtStart = BI == BB->begin();
-      if (!AtStart)
-        --BI;
-
-      for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
-           E = toErase.end(); I != E; ++I)
-        (*I)->eraseFromParent();
-
-      if (AtStart)
-        BI = BB->begin();
-      else
-        ++BI;
-      
-      toErase.clear();
+      if (StoreInst *SI = dyn_cast<StoreInst>(I))
+        changed_function |= processStore(SI, BI);
+      else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+        changed_function |= processMemCpy(M);
+      }
      }
    }