Replace dyn_castGetElementPtr with dyn_cast<GEPOperator>.

[oota-llvm.git] / lib / Transforms / Scalar / MemCpyOptimizer.cpp
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

index 6d27327991f10234a160007b1fa2b57da37ad1d5..c359e47d05d790dcb2a7ccc7f505b8278aa70073 100644 (file)
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -16,6 +16,7 @@
  #include "llvm/Transforms/Scalar.h"
  #include "llvm/IntrinsicInst.h"
  #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/Dominators.h"
@@ -35,7 +36,7 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
  /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
  /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
  /// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
+static Value *isBytewiseValue(Value *V, LLVMContext& Context) {
    // All byte-wide stores are splatable, even of arbitrary variables.
    if (V->getType() == Type::Int8Ty) return V;
    
@@ -43,9 +44,9 @@ static Value *isBytewiseValue(Value *V) {
    // corresponding integer value is "byteable".  An important case is 0.0. 
    if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
      if (CFP->getType() == Type::FloatTy)
-      V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
+      V = Context.getConstantExprBitCast(CFP, Type::Int32Ty);
      if (CFP->getType() == Type::DoubleTy)
-      V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
+      V = Context.getConstantExprBitCast(CFP, Type::Int64Ty);
      // Don't handle long double formats, which have strange constraints.
    }
    
@@ -68,7 +69,7 @@ static Value *isBytewiseValue(Value *V) {
          if (Val != Val2)
            return 0;
        }
-      return ConstantInt::get(Val);
+      return ConstantInt::get(Context, Val);
      }
    }
    
@@ -104,7 +105,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
      
      // Otherwise, we have a sequential type like an array or vector.  Multiply
      // the index by the ElementSize.
-    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
      Offset += Size*OpC->getSExtValue();
    }
  
@@ -277,7 +278,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
    // End.
    if (End > I->End) {
      I->End = End;
-    range_iterator NextI = I;;
+    range_iterator NextI = I;
      while (++NextI != E && End >= NextI->Start) {
        // Merge the range in.
        I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
@@ -345,12 +346,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
    // Ensure that the value being stored is something that can be memset'able a
    // byte at a time like "0" or "-1" or any width, as well as things like
    // 0xA0A0A0A0 and 0.0.
-  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
+  Value *ByteVal = isBytewiseValue(SI->getOperand(0), SI->getContext());
    if (!ByteVal)
      return false;
  
    TargetData &TD = getAnalysis<TargetData>();
    AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  LLVMContext &Context = SI->getContext();
+  Module *M = SI->getParent()->getParent()->getParent();
  
    // Okay, so we now have a single store that can be splatable.  Scan to find
    // all subsequent stores of the same value to offset from the same pointer.
@@ -384,7 +387,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
      if (NextStore->isVolatile()) break;
      
      // Check to see if this stored value is of the same byte-splattable value.
-    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+    if (ByteVal != isBytewiseValue(NextStore->getOperand(0), 
+                                   NextStore->getContext()))
        break;
  
      // Check to see if this store is to a constant offset from the start ptr.
@@ -429,8 +433,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
    
      if (MemSetF == 0) {
        const Type *Tys[] = {Type::Int64Ty};
-      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
-                                          ->getParent(), Intrinsic::memset,
+      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset,
                                            Tys, 1);
     }
      
@@ -438,15 +441,17 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
      StartPtr = Range.StartPtr;
    
      // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
+    const Type *i8Ptr = Context.getPointerTypeUnqual(Type::Int8Ty);
      if (StartPtr->getType() != i8Ptr)
-      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
+      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                   InsertPt);
    
      Value *Ops[] = {
        StartPtr, ByteVal,   // Start, value
-      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),  // size
-      ConstantInt::get(Type::Int32Ty, Range.Alignment)   // align
+      // size
+      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),
+      // align
+      ConstantInt::get(Type::Int32Ty, Range.Alignment)
      };
      Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
      DEBUG(cerr << "Replace stores:\n";
@@ -511,7 +516,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
    if (!srcArraySize)
      return false;
  
-  uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) *
+  uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) *
      srcArraySize->getZExtValue();
  
    if (cpyLength->getZExtValue() < srcSize)
@@ -526,7 +531,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
      if (!destArraySize)
        return false;
  
-    uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) *
+    uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) *
        destArraySize->getZExtValue();
  
      if (destSize < srcSize)
@@ -538,7 +543,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
        return false;
  
      const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
-    uint64_t destSize = TD.getABITypeSize(StructTy);
+    uint64_t destSize = TD.getTypeAllocSize(StructTy);
  
      if (destSize < srcSize)
        return false;
@@ -609,7 +614,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
    // Drop any cached information about the call, because we may have changed
    // its dependence information by changing its parameter.
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  MD.dropInstruction(C);
+  MD.removeInstruction(C);
  
    // Remove the memcpy
    MD.removeInstruction(cpy);
@@ -629,18 +634,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
    // The are two possible optimizations we can do for memcpy:
    //   a) memcpy-memcpy xform which exposes redundance for DSE
    //   b) call-memcpy xform for return slot optimization
-  Instruction* dep = MD.getDependency(M);
-  if (dep == MemoryDependenceAnalysis::None ||
-      dep == MemoryDependenceAnalysis::NonLocal)
+  MemDepResult dep = MD.getDependency(M);
+  if (!dep.isClobber())
      return false;
-  else if (!isa<MemCpyInst>(dep)) {
-    if (CallInst* C = dyn_cast<CallInst>(dep))
+  if (!isa<MemCpyInst>(dep.getInst())) {
+    if (CallInst* C = dyn_cast<CallInst>(dep.getInst()))
        return performCallSlotOptzn(M, C);
-    else
-      return false;
+    return false;
    }
    
-  MemCpyInst* MDep = cast<MemCpyInst>(dep);
+  MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst());
    
    // We can only transforms memcpy's where the dest of one is the source of the
    // other
@@ -680,23 +683,19 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
                                   M->getParent()->getParent()->getParent(),
                                   M->getIntrinsicID(), Tys, 1);
      
-  std::vector<Value*> args;
-  args.push_back(M->getRawDest());
-  args.push_back(MDep->getRawSource());
-  args.push_back(M->getLength());
-  args.push_back(M->getAlignment());
+  Value *Args[4] = {
+    M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+  };
    
-  CallInst* C = CallInst::Create(MemCpyFun, args.begin(), args.end(), "", M);
+  CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
    
    
    // If C and M don't interfere, then this is a valid transformation.  If they
    // did, this would mean that the two sources overlap, which would be bad.
-  if (MD.getDependency(C) == MDep) {
-    MD.dropInstruction(M);
+  if (MD.getDependency(C) == dep) {
+    MD.removeInstruction(M);
      M->eraseFromParent();
-    
      NumMemCpyInstr++;
-    
      return true;
    }
    
@@ -704,7 +703,6 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
    // inserted and act like nothing happened.
    MD.removeInstruction(C);
    C->eraseFromParent();
-  
    return false;
  }