[SROA] Thread the ability to add a pointer-specific name prefix through

[oota-llvm.git] / lib / Transforms / Scalar / SROA.cpp
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index dcd8e5d15682d436d6fa84d7117c5c7ca7265b9e..33f7e1582ca60892db7f0f6654cdb1777d9616c0 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -29,7 +29,6 @@
  #include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/Loads.h"
  #include "llvm/Analysis/PtrUseVisitor.h"
  #include "llvm/Analysis/ValueTracking.h"
@@ -38,6 +37,7 @@
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/IRBuilder.h"
  #include "llvm/IR/Instructions.h"
@@ -51,10 +51,17 @@
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TimeValue.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  #include "llvm/Transforms/Utils/SSAUpdater.h"
+
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+// We only use this for a debug check in C++11
+#include <random>
+#endif
+
  using namespace llvm;
  
  STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
@@ -73,6 +80,11 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates");
  static cl::opt<bool>
  ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
  
+/// Hidden option to enable randomly shuffling the slices to help uncover
+/// instability in their order.
+static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
+                                             cl::init(false), cl::Hidden);
+
  namespace {
  /// \brief A custom IRBuilder inserter which prefixes all names if they are
  /// preserved.
@@ -461,14 +473,30 @@ private:
  
    void visitMemTransferInst(MemTransferInst &II) {
      ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    if ((Length && Length->getValue() == 0) ||
-        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+    if (Length && Length->getValue() == 0)
        // Zero-length mem transfer intrinsics can be ignored entirely.
        return markAsDead(II);
  
+    // Because we can visit these intrinsics twice, also check to see if the
+    // first time marked this instruction as dead. If so, skip it.
+    if (VisitedDeadInsts.count(&II))
+      return;
+
      if (!IsOffsetKnown)
        return PI.setAborted(&II);
  
+    // This side of the transfer is completely out-of-bounds, and so we can
+    // nuke the entire transfer. However, we also need to nuke the other side
+    // if already added to our partitions.
+    // FIXME: Yet another place we really should bypass this when
+    // instrumenting for ASan.
+    if (!Offset.isNegative() && Offset.uge(AllocSize)) {
+      SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
+      if (MTPI != MemTransferSliceMap.end())
+        S.Slices[MTPI->second].kill();
+      return markAsDead(II);
+    }
+
      uint64_t RawOffset = Offset.getLimitedValue();
      uint64_t Size = Length ? Length->getLimitedValue()
                             : AllocSize - RawOffset;
@@ -674,6 +702,13 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
                                std::mem_fun_ref(&Slice::isDead)),
                 Slices.end());
  
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+  if (SROARandomShuffleSlices) {
+    std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
+    std::shuffle(Slices.begin(), Slices.end(), MT);
+  }
+#endif
+
    // Sort the uses. This arranges for the offsets to be in ascending order,
    // and the sizes to be in descending order.
    std::sort(Slices.begin(), Slices.end());
@@ -917,6 +952,7 @@ private:
                          ArrayRef<AllocaSlices::iterator> SplitUses);
    bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
    bool runOnAlloca(AllocaInst &AI);
+  void clobberUse(Use &U);
    void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
    bool promoteAllocas(Function &F);
  };
@@ -930,7 +966,7 @@ FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
  
  INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
                        false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
                      false, false)
  
@@ -940,7 +976,11 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
                              AllocaSlices::const_iterator E,
                              uint64_t EndOffset) {
    Type *Ty = 0;
-  bool IgnoreNonIntegralTypes = false;
+  bool TyIsCommon = true;
+  IntegerType *ITy = 0;
+
+  // Note that we need to look at *every* alloca slice's Use to ensure we
+  // always get consistent results regardless of the order of slices.
    for (AllocaSlices::const_iterator I = B; I != E; ++I) {
      Use *U = I->getUse();
      if (isa<IntrinsicInst>(*U->getUser()))
@@ -953,37 +993,30 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
        UserTy = LI->getType();
      } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
        UserTy = SI->getValueOperand()->getType();
-    } else {
-      IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
-      continue;
      }
  
-    if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+    if (!UserTy || (Ty && Ty != UserTy))
+      TyIsCommon = false; // Give up on anything but an iN type.
+    else
+      Ty = UserTy;
+
+    if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
        // If the type is larger than the partition, skip it. We only encounter
        // this for split integer operations where we want to use the type of the
        // entity causing the split. Also skip if the type is not a byte width
        // multiple.
-      if (ITy->getBitWidth() % 8 != 0 ||
-          ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+      if (UserITy->getBitWidth() % 8 != 0 ||
+          UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
          continue;
  
-      // If we have found an integer type use covering the alloca, use that
-      // regardless of the other types, as integers are often used for
-      // a "bucket of bits" type.
-      //
-      // NB: This *must* be the only return from inside the loop so that the
-      // order of slices doesn't impact the computed type.
-      return ITy;
-    } else if (IgnoreNonIntegralTypes) {
-      continue;
+      // Track the largest bitwidth integer type used in this way in case there
+      // is no common type.
+      if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
+        ITy = UserITy;
      }
-
-    if (Ty && Ty != UserTy)
-      IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
-
-    Ty = UserTy;
    }
-  return Ty;
+
+  return TyIsCommon ? Ty : ITy;
  }
  
  /// PHI instructions that use an alloca and are subsequently loaded can be
@@ -1190,7 +1223,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
  /// This will return the BasePtr if that is valid, or build a new GEP
  /// instruction using the IRBuilder if GEP-ing is needed.
  static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
-                       SmallVectorImpl<Value *> &Indices) {
+                       SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
    if (Indices.empty())
      return BasePtr;
  
@@ -1199,7 +1232,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
    if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
      return BasePtr;
  
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
+  return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
  }
  
  /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1213,9 +1246,10 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
  /// indicated by Indices to have the correct offset.
  static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
                                      Value *BasePtr, Type *Ty, Type *TargetTy,
-                                    SmallVectorImpl<Value *> &Indices) {
+                                    SmallVectorImpl<Value *> &Indices,
+                                    Twine NamePrefix) {
    if (Ty == TargetTy)
-    return buildGEP(IRB, BasePtr, Indices);
+    return buildGEP(IRB, BasePtr, Indices, NamePrefix);
  
    // See if we can descend into a struct and locate a field with the correct
    // type.
@@ -1242,7 +1276,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
    if (ElementTy != TargetTy)
      Indices.erase(Indices.end() - NumLayers, Indices.end());
  
-  return buildGEP(IRB, BasePtr, Indices);
+  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
  }
  
  /// \brief Recursively compute indices for a natural GEP.
@@ -1252,9 +1286,10 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
  static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
                                         Value *Ptr, Type *Ty, APInt &Offset,
                                         Type *TargetTy,
-                                       SmallVectorImpl<Value *> &Indices) {
+                                       SmallVectorImpl<Value *> &Indices,
+                                       Twine NamePrefix) {
    if (Offset == 0)
-    return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices);
+    return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, NamePrefix);
  
    // We can't recurse through pointer types.
    if (Ty->isPointerTy())
@@ -1274,7 +1309,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
      Offset -= NumSkippedElements * ElementSize;
      Indices.push_back(IRB.getInt(NumSkippedElements));
      return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
-                                    Offset, TargetTy, Indices);
+                                    Offset, TargetTy, Indices, NamePrefix);
    }
  
    if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1287,7 +1322,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
      Offset -= NumSkippedElements * ElementSize;
      Indices.push_back(IRB.getInt(NumSkippedElements));
      return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                    Indices);
+                                    Indices, NamePrefix);
    }
  
    StructType *STy = dyn_cast<StructType>(Ty);
@@ -1306,7 +1341,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
  
    Indices.push_back(IRB.getInt32(Index));
    return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices);
+                                  Indices, NamePrefix);
  }
  
  /// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1321,7 +1356,8 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
  /// If no natural GEP can be constructed, this function returns null.
  static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
                                        Value *Ptr, APInt Offset, Type *TargetTy,
-                                      SmallVectorImpl<Value *> &Indices) {
+                                      SmallVectorImpl<Value *> &Indices,
+                                      Twine NamePrefix) {
    PointerType *Ty = cast<PointerType>(Ptr->getType());
  
    // Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1340,7 +1376,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
    Offset -= NumSkippedElements * ElementSize;
    Indices.push_back(IRB.getInt(NumSkippedElements));
    return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices);
+                                  Indices, NamePrefix);
  }
  
  /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1358,8 +1394,9 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
  /// properties. The algorithm tries to fold as many constant indices into
  /// a single GEP as possible, thus making each GEP more independent of the
  /// surrounding code.
-static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
-                             Value *Ptr, APInt Offset, Type *PointerTy) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
+                             APInt Offset, Type *PointerTy,
+                             Twine NamePrefix) {
    // Even though we don't look through PHI nodes, we could be called on an
    // instruction in an unreachable block, which may be on a cycle.
    SmallPtrSet<Value *, 4> Visited;
@@ -1393,7 +1430,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
      // See if we can perform a natural GEP here.
      Indices.clear();
      if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
-                                           Indices)) {
+                                           Indices, NamePrefix)) {
        if (P->getType() == PointerTy) {
          // Zap any offset pointer that we ended up computing in previous rounds.
          if (OffsetPtr && OffsetPtr->use_empty())
@@ -1428,19 +1465,19 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
    if (!OffsetPtr) {
      if (!Int8Ptr) {
        Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
-                                  "raw_cast");
+                                  NamePrefix + "sroa_raw_cast");
        Int8PtrOffset = Offset;
      }
  
      OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
        IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
-                            "raw_idx");
+                            NamePrefix + "sroa_raw_idx");
    }
    Ptr = OffsetPtr;
  
    // On the off chance we were targeting i8*, guard the bitcast here.
    if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
+    Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
  
    return Ptr;
  }
@@ -1940,9 +1977,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
    Use *OldUse;
    Instruction *OldPtr;
  
-  // Output members carrying state about the result of visiting and rewriting
-  // the slice of the alloca.
-  bool IsUsedByRewrittenSpeculatableInstructions;
+  // Track post-rewrite users which are PHI nodes and Selects.
+  SmallPtrSetImpl<PHINode *> &PHIUsers;
+  SmallPtrSetImpl<SelectInst *> &SelectUsers;
  
    // Utility IR builder, whose name prefix is setup for each visited use, and
    // the insertion point is set to point to the user.
@@ -1952,8 +1989,9 @@ public:
    AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
                        AllocaInst &OldAI, AllocaInst &NewAI,
                        uint64_t NewBeginOffset, uint64_t NewEndOffset,
-                      bool IsVectorPromotable = false,
-                      bool IsIntegerPromotable = false)
+                      bool IsVectorPromotable, bool IsIntegerPromotable,
+                      SmallPtrSetImpl<PHINode *> &PHIUsers,
+                      SmallPtrSetImpl<SelectInst *> &SelectUsers)
        : DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
          NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset),
          NewAllocaTy(NewAI.getAllocatedType()),
@@ -1966,7 +2004,7 @@ public:
                          DL.getTypeSizeInBits(NewAI.getAllocatedType()))
                    : 0),
          BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
-        OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false),
+        OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
          IRB(NewAI.getContext(), ConstantFolder()) {
      if (VecTy) {
        assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
@@ -1999,20 +2037,6 @@ public:
      return CanSROA;
    }
  
-  /// \brief Query whether this slice is used by speculatable instructions after
-  /// rewriting.
-  ///
-  /// These instructions (PHIs and Selects currently) require the alloca slice
-  /// to run back through the rewriter. Thus, they are promotable, but not on
-  /// this iteration. This is distinct from a slice which is unpromotable for
-  /// some other reason, in which case we don't even want to perform the
-  /// speculation. This can be querried at any time and reflects whether (at
-  /// that point) a visit call has rewritten a speculatable instruction on the
-  /// current slice.
-  bool isUsedByRewrittenSpeculatableInstructions() const {
-    return IsUsedByRewrittenSpeculatableInstructions;
-  }
-
  private:
    // Make sure the other visit overloads are visible.
    using Base::visit;
@@ -2026,9 +2050,35 @@ private:
    Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, uint64_t Offset,
                                Type *PointerTy) {
      assert(Offset >= NewAllocaBeginOffset);
+#ifndef NDEBUG
+    StringRef OldName = OldPtr->getName();
+    // Skip through the last '.sroa.' component of the name.
+    size_t LastSROAPrefix = OldName.rfind(".sroa.");
+    if (LastSROAPrefix != StringRef::npos) {
+      OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
+      // Look for an SROA slice index.
+      size_t IndexEnd = OldName.find_first_not_of("0123456789");
+      if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
+        // Strip the index and look for the offset.
+        OldName = OldName.substr(IndexEnd + 1);
+        size_t OffsetEnd = OldName.find_first_not_of("0123456789");
+        if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
+          // Strip the offset.
+          OldName = OldName.substr(OffsetEnd + 1);
+      }
+    }
+    // Strip any SROA suffixes as well.
+    OldName = OldName.substr(0, OldName.find(".sroa_"));
+#endif
      return getAdjustedPtr(IRB, DL, &NewAI, APInt(DL.getPointerSizeInBits(),
                                                   Offset - NewAllocaBeginOffset),
-                          PointerTy);
+                          PointerTy,
+#ifndef NDEBUG
+                          Twine(OldName) + "."
+#else
+                          Twine()
+#endif
+                          );
    }
  
    /// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2255,8 +2305,8 @@ private:
        Value *NewPtr = getAdjustedAllocaPtr(IRB, NewBeginOffset,
                                             V->getType()->getPointerTo());
        NewSI = IRB.CreateAlignedStore(
-          V, NewPtr, getOffsetTypeAlign(
-                         V->getType(), NewBeginOffset - NewAllocaBeginOffset),
+          V, NewPtr, getOffsetTypeAlign(V->getType(),
+                                        NewBeginOffset - NewAllocaBeginOffset),
            SI.isVolatile());
      }
      (void)NewSI;
@@ -2310,8 +2360,7 @@ private:
      if (!isa<Constant>(II.getLength())) {
        assert(!IsSplit);
        assert(BeginOffset >= NewAllocaBeginOffset);
-      II.setDest(
-          getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+      II.setDest(getAdjustedAllocaPtr(IRB, BeginOffset, OldPtr->getType()));
        Type *CstTy = II.getAlignmentCst()->getType();
        II.setAlignment(ConstantInt::get(CstTy, getOffsetAlign(BeginOffset)));
  
@@ -2343,7 +2392,7 @@ private:
        Type *SizeTy = II.getLength()->getType();
        Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
        CallInst *New = IRB.CreateMemSet(
-          getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getRawDest()->getType()),
+          getAdjustedAllocaPtr(IRB, NewBeginOffset, OldPtr->getType()),
            II.getValue(), Size, getOffsetAlign(SliceOffset), II.isVolatile());
        (void)New;
        DEBUG(dbgs() << "          to: " << *New << "\n");
@@ -2427,8 +2476,9 @@ private:
      uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
      uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
  
-    assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
-    bool IsDest = II.getRawDest() == OldPtr;
+    bool IsDest = &II.getRawDestUse() == OldUse;
+    assert((IsDest && II.getRawDest() == OldPtr) ||
+           (!IsDest && II.getRawSource() == OldPtr));
  
      // Compute the relative offset within the transfer.
      unsigned IntPtrWidth = DL.getPointerSizeInBits();
@@ -2449,19 +2499,18 @@ private:
      // memcpy, and so simply updating the pointers is the necessary for us to
      // update both source and dest of a single call.
      if (!IsSplittable) {
-      Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
+      Value *AdjustedPtr =
+          getAdjustedAllocaPtr(IRB, BeginOffset, OldPtr->getType());
        if (IsDest)
-        II.setDest(
-            getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+        II.setDest(AdjustedPtr);
        else
-        II.setSource(getAdjustedAllocaPtr(IRB, BeginOffset,
-                                          II.getRawSource()->getType()));
+        II.setSource(AdjustedPtr);
  
        Type *CstTy = II.getAlignmentCst()->getType();
        II.setAlignment(ConstantInt::get(CstTy, Align));
  
        DEBUG(dbgs() << "          to: " << II << "\n");
-      deleteIfTriviallyDead(OldOp);
+      deleteIfTriviallyDead(OldPtr);
        return false;
      }
      // For split transfer intrinsics we have an incredibly useful assurance:
@@ -2497,20 +2546,22 @@ private:
      // alloca that should be re-examined after rewriting this instruction.
      Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
      if (AllocaInst *AI
-          = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
+          = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
+      assert(AI != &OldAI && AI != &NewAI &&
+             "Splittable transfers cannot reach the same alloca on both ends.");
        Pass.Worklist.insert(AI);
+    }
  
      if (EmitMemCpy) {
-      Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
-                                : II.getRawDest()->getType();
+      Type *OtherPtrTy = OtherPtr->getType();
  
        // Compute the other pointer, folding as much as possible to produce
        // a single, simple GEP in most cases.
-      OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+      OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy,
+                                OtherPtr->getName() + ".");
  
-      Value *OurPtr = getAdjustedAllocaPtr(
-          IRB, NewBeginOffset,
-          IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType());
+      Value *OurPtr =
+          getAdjustedAllocaPtr(IRB, NewBeginOffset, OldPtr->getType());
        Type *SizeTy = II.getLength()->getType();
        Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
  
@@ -2549,7 +2600,8 @@ private:
        OtherPtrTy = SubIntTy->getPointerTo();
      }
  
-    Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+    Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy,
+                                   OtherPtr->getName() + ".");
      Value *DstPtr = &NewAI;
      if (!IsDest)
        std::swap(SrcPtr, DstPtr);
@@ -2608,8 +2660,7 @@ private:
      ConstantInt *Size
        = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
                           NewEndOffset - NewBeginOffset);
-    Value *Ptr =
-        getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getArgOperand(1)->getType());
+    Value *Ptr = getAdjustedAllocaPtr(IRB, NewBeginOffset, OldPtr->getType());
      Value *New;
      if (II.getIntrinsicID() == Intrinsic::lifetime_start)
        New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -2630,9 +2681,9 @@ private:
      // as local as possible to the PHI. To do that, we re-use the location of
      // the old pointer, which necessarily must be in the right position to
      // dominate the PHI.
-    IRBuilderTy PtrBuilder(OldPtr);
-    PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
-                             ".");
+    IRBuilderTy PtrBuilder(IRB);
+    PtrBuilder.SetInsertPoint(OldPtr);
+    PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
  
      Value *NewPtr =
          getAdjustedAllocaPtr(PtrBuilder, BeginOffset, OldPtr->getType());
@@ -2642,16 +2693,11 @@ private:
      DEBUG(dbgs() << "          to: " << PN << "\n");
      deleteIfTriviallyDead(OldPtr);
  
-    // Check whether we can speculate this PHI node, and if so remember that
-    // fact and queue it up for another iteration after the speculation
-    // occurs.
-    if (isSafePHIToSpeculate(PN, &DL)) {
-      Pass.SpeculatablePHIs.insert(&PN);
-      IsUsedByRewrittenSpeculatableInstructions = true;
-      return true;
-    }
-
-    return false; // PHIs can't be promoted on their own.
+    // PHIs can't be promoted on their own, but often can be speculated. We
+    // check the speculation outside of the rewriter so that we see the
+    // fully-rewritten alloca.
+    PHIUsers.insert(&PN);
+    return true;
    }
  
    bool visitSelectInst(SelectInst &SI) {
@@ -2671,16 +2717,11 @@ private:
      DEBUG(dbgs() << "          to: " << SI << "\n");
      deleteIfTriviallyDead(OldPtr);
  
-    // Check whether we can speculate this select instruction, and if so
-    // remember that fact and queue it up for another iteration after the
-    // speculation occurs.
-    if (isSafeSelectToSpeculate(SI, &DL)) {
-      Pass.SpeculatableSelects.insert(&SI);
-      IsUsedByRewrittenSpeculatableInstructions = true;
-      return true;
-    }
-
-    return false; // Selects can't be promoted on their own.
+    // Selects can't be promoted on their own, but often can be speculated. We
+    // check the speculation outside of the rewriter so that we see the
+    // fully-rewritten alloca.
+    SelectUsers.insert(&SI);
+    return true;
    }
  
  };
@@ -3116,17 +3157,17 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
                 << "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
                 << "\n");
  
-  // Track the high watermark on several worklists that are only relevant for
+  // Track the high watermark on the worklist as it is only relevant for
    // promoted allocas. We will reset it to this point if the alloca is not in
    // fact scheduled for promotion.
    unsigned PPWOldSize = PostPromotionWorklist.size();
-  unsigned SPOldSize = SpeculatablePHIs.size();
-  unsigned SSOldSize = SpeculatableSelects.size();
    unsigned NumUses = 0;
+  SmallPtrSet<PHINode *, 8> PHIUsers;
+  SmallPtrSet<SelectInst *, 8> SelectUsers;
  
    AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
                                 EndOffset, IsVectorPromotable,
-                               IsIntegerPromotable);
+                               IsIntegerPromotable, PHIUsers, SelectUsers);
    bool Promotable = true;
    for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
                                                          SUE = SplitUses.end();
@@ -3147,33 +3188,55 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
    MaxUsesPerAllocaPartition =
        std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
  
-  if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) {
-    DEBUG(dbgs() << "  and queuing for promotion\n");
-    PromotableAllocas.push_back(NewAI);
-  } else if (NewAI != &AI ||
-             (Promotable &&
-              Rewriter.isUsedByRewrittenSpeculatableInstructions())) {
+  // Now that we've processed all the slices in the new partition, check if any
+  // PHIs or Selects would block promotion.
+  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+                                            E = PHIUsers.end();
+       I != E; ++I)
+    if (!isSafePHIToSpeculate(**I, DL)) {
+      Promotable = false;
+      PHIUsers.clear();
+      SelectUsers.clear();
+      break;
+    }
+  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+                                               E = SelectUsers.end();
+       I != E; ++I)
+    if (!isSafeSelectToSpeculate(**I, DL)) {
+      Promotable = false;
+      PHIUsers.clear();
+      SelectUsers.clear();
+      break;
+    }
+
+  if (Promotable) {
+    if (PHIUsers.empty() && SelectUsers.empty()) {
+      // Promote the alloca.
+      PromotableAllocas.push_back(NewAI);
+    } else {
+      // If we have either PHIs or Selects to speculate, add them to those
+      // worklists and re-queue the new alloca so that we promote in on the
+      // next iteration.
+      for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+                                                E = PHIUsers.end();
+           I != E; ++I)
+        SpeculatablePHIs.insert(*I);
+      for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+                                                   E = SelectUsers.end();
+           I != E; ++I)
+        SpeculatableSelects.insert(*I);
+      Worklist.insert(NewAI);
+    }
+  } else {
      // If we can't promote the alloca, iterate on it to check for new
      // refinements exposed by splitting the current alloca. Don't iterate on an
      // alloca which didn't actually change and didn't get promoted.
-    //
-    // Alternatively, if we could promote the alloca but have speculatable
-    // instructions then we will speculate them after finishing our processing
-    // of the original alloca. Mark the new one for re-visiting in the next
-    // iteration so the speculated operations can be rewritten.
-    //
-    // FIXME: We should actually track whether the rewriter changed anything.
-    Worklist.insert(NewAI);
-  }
-
-  // Drop any post-promotion work items if promotion didn't happen.
-  if (!Promotable) {
+    if (NewAI != &AI)
+      Worklist.insert(NewAI);
+
+    // Drop any post-promotion work items if promotion didn't happen.
      while (PostPromotionWorklist.size() > PPWOldSize)
        PostPromotionWorklist.pop_back();
-    while (SpeculatablePHIs.size() > SPOldSize)
-      SpeculatablePHIs.pop_back();
-    while (SpeculatableSelects.size() > SSOldSize)
-      SpeculatableSelects.pop_back();
    }
  
    return true;
@@ -3328,6 +3391,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
    return Changed;
  }
  
+/// \brief Clobber a use with undef, deleting the used value if it becomes dead.
+void SROA::clobberUse(Use &U) {
+  Value *OldV = U;
+  // Replace the use with an undef value.
+  U = UndefValue::get(OldV->getType());
+
+  // Check for this making an instruction dead. We have to garbage collect
+  // all the dead instructions to ensure the uses of any alloca end up being
+  // minimal.
+  if (Instruction *OldI = dyn_cast<Instruction>(OldV))
+    if (isInstructionTriviallyDead(OldI)) {
+      DeadInsts.insert(OldI);
+    }
+}
+
  /// \brief Analyze an alloca for SROA.
  ///
  /// This analyzes the alloca to ensure we can reason about it, builds
@@ -3365,21 +3443,23 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
    for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
                                          DE = S.dead_user_end();
         DI != DE; ++DI) {
-    Changed = true;
+    // Free up everything used by this instruction.
+    for (User::op_iterator DOI = (*DI)->op_begin(), DOE = (*DI)->op_end();
+         DOI != DOE; ++DOI)
+      clobberUse(*DOI);
+
+    // Now replace the uses of this instruction.
      (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+
+    // And mark it for deletion.
      DeadInsts.insert(*DI);
+    Changed = true;
    }
    for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
                                        DE = S.dead_op_end();
         DO != DE; ++DO) {
-    Value *OldV = **DO;
-    // Clobber the use with an undef value.
-    **DO = UndefValue::get(OldV->getType());
-    if (Instruction *OldI = dyn_cast<Instruction>(OldV))
-      if (isInstructionTriviallyDead(OldI)) {
-        Changed = true;
-        DeadInsts.insert(OldI);
-      }
+    clobberUse(**DO);
+    Changed = true;
    }
  
    // No slices to split. Leave the dead alloca for a later pass to clean up.
@@ -3538,6 +3618,9 @@ namespace {
  }
  
  bool SROA::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
    DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
    C = &F.getContext();
    DL = getAnalysisIfAvailable<DataLayout>();
@@ -3545,7 +3628,9 @@ bool SROA::runOnFunction(Function &F) {
      DEBUG(dbgs() << "  Skipping SROA -- no target data!\n");
      return false;
    }
-  DT = getAnalysisIfAvailable<DominatorTree>();
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : 0;
  
    BasicBlock &EntryBB = F.getEntryBlock();
    for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end());
@@ -3587,6 +3672,6 @@ bool SROA::runOnFunction(Function &F) {
  
  void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
    if (RequiresDomTree)
-    AU.addRequired<DominatorTree>();
+    AU.addRequired<DominatorTreeWrapperPass>();
    AU.setPreservesCFG();
  }