[SROA] Yet another slight refactoring that simplifies an API in the

[oota-llvm.git] / lib / Transforms / Scalar / SROA.cpp
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index ff08401377024a7d8ce0000ad67f613ab45cec7e..85d279826832e0db8beade65a874adacc03d6cee 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -51,10 +51,17 @@
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TimeValue.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  #include "llvm/Transforms/Utils/SSAUpdater.h"
+
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+// We only use this for a debug check in C++11
+#include <random>
+#endif
+
  using namespace llvm;
  
  STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
@@ -73,6 +80,16 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates");
  static cl::opt<bool>
  ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
  
+/// Hidden option to enable randomly shuffling the slices to help uncover
+/// instability in their order.
+static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
+                                             cl::init(false), cl::Hidden);
+
+/// Hidden option to experiment with completely strict handling of inbounds
+/// GEPs.
+static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds",
+                                        cl::init(false), cl::Hidden);
+
  namespace {
  /// \brief A custom IRBuilder inserter which prefixes all names if they are
  /// preserved.
@@ -339,7 +356,7 @@ private:
                   bool IsSplittable = false) {
      // Completely skip uses which have a zero size or start either before or
      // past the end of the allocation.
-    if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
+    if (Size == 0 || Offset.uge(AllocSize)) {
        DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
                     << " which has zero size or starts outside of the "
                     << AllocSize << " byte alloca:\n"
@@ -380,6 +397,43 @@ private:
      if (GEPI.use_empty())
        return markAsDead(GEPI);
  
+    if (SROAStrictInbounds && GEPI.isInBounds()) {
+      // FIXME: This is a manually un-factored variant of the basic code inside
+      // of GEPs with checking of the inbounds invariant specified in the
+      // langref in a very strict sense. If we ever want to enable
+      // SROAStrictInbounds, this code should be factored cleanly into
+      // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
+      // by writing out the code here where we have tho underlying allocation
+      // size readily available.
+      APInt GEPOffset = Offset;
+      for (gep_type_iterator GTI = gep_type_begin(GEPI),
+                             GTE = gep_type_end(GEPI);
+           GTI != GTE; ++GTI) {
+        ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+        if (!OpC)
+          break;
+
+        // Handle a struct index, which adds its field offset to the pointer.
+        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+          unsigned ElementIdx = OpC->getZExtValue();
+          const StructLayout *SL = DL.getStructLayout(STy);
+          GEPOffset +=
+              APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
+        } else {
+          // For array or vector indices, scale the index by the size of the type.
+          APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
+          GEPOffset += Index * APInt(Offset.getBitWidth(),
+                                     DL.getTypeAllocSize(GTI.getIndexedType()));
+        }
+
+        // If this index has computed an intermediate pointer which is not
+        // inbounds, then the result of the GEP is a poison value and we can
+        // delete it and all uses.
+        if (GEPOffset.ugt(AllocSize))
+          return markAsDead(GEPI);
+      }
+    }
+
      return Base::visitGetElementPtrInst(GEPI);
    }
  
@@ -426,8 +480,7 @@ private:
      // risk of overflow.
      // FIXME: We should instead consider the pointer to have escaped if this
      // function is being instrumented for addressing bugs or race conditions.
-    if (Offset.isNegative() || Size > AllocSize ||
-        Offset.ugt(AllocSize - Size)) {
+    if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
        DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
                     << " which extends past the end of the " << AllocSize
                     << " byte alloca:\n"
@@ -446,7 +499,7 @@ private:
      assert(II.getRawDest() == *U && "Pointer use is not the destination?");
      ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
      if ((Length && Length->getValue() == 0) ||
-        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+        (IsOffsetKnown && Offset.uge(AllocSize)))
        // Zero-length mem transfer intrinsics can be ignored entirely.
        return markAsDead(II);
  
@@ -478,7 +531,7 @@ private:
      // if already added to our partitions.
      // FIXME: Yet another place we really should bypass this when
      // instrumenting for ASan.
-    if (!Offset.isNegative() && Offset.uge(AllocSize)) {
+    if (Offset.uge(AllocSize)) {
        SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
        if (MTPI != MemTransferSliceMap.end())
          S.Slices[MTPI->second].kill();
@@ -613,8 +666,7 @@ private:
      // themselves which should be replaced with undef.
      // FIXME: This should instead be escaped in the event we're instrumenting
      // for address sanitization.
-    if ((Offset.isNegative() && (-Offset).uge(PHISize)) ||
-        (!Offset.isNegative() && Offset.uge(AllocSize))) {
+    if (Offset.uge(AllocSize)) {
        S.DeadOperands.push_back(U);
        return;
      }
@@ -654,8 +706,7 @@ private:
      // themselves which should be replaced with undef.
      // FIXME: This should instead be escaped in the event we're instrumenting
      // for address sanitization.
-    if ((Offset.isNegative() && Offset.uge(SelectSize)) ||
-        (!Offset.isNegative() && Offset.uge(AllocSize))) {
+    if (Offset.uge(AllocSize)) {
        S.DeadOperands.push_back(U);
        return;
      }
@@ -690,6 +741,13 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
                                std::mem_fun_ref(&Slice::isDead)),
                 Slices.end());
  
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+  if (SROARandomShuffleSlices) {
+    std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
+    std::shuffle(Slices.begin(), Slices.end(), MT);
+  }
+#endif
+
    // Sort the uses. This arranges for the offsets to be in ascending order,
    // and the sizes to be in descending order.
    std::sort(Slices.begin(), Slices.end());
@@ -957,7 +1015,11 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
                              AllocaSlices::const_iterator E,
                              uint64_t EndOffset) {
    Type *Ty = 0;
-  bool IgnoreNonIntegralTypes = false;
+  bool TyIsCommon = true;
+  IntegerType *ITy = 0;
+
+  // Note that we need to look at *every* alloca slice's Use to ensure we
+  // always get consistent results regardless of the order of slices.
    for (AllocaSlices::const_iterator I = B; I != E; ++I) {
      Use *U = I->getUse();
      if (isa<IntrinsicInst>(*U->getUser()))
@@ -970,37 +1032,30 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
        UserTy = LI->getType();
      } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
        UserTy = SI->getValueOperand()->getType();
-    } else {
-      IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
-      continue;
      }
  
-    if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+    if (!UserTy || (Ty && Ty != UserTy))
+      TyIsCommon = false; // Give up on anything but an iN type.
+    else
+      Ty = UserTy;
+
+    if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
        // If the type is larger than the partition, skip it. We only encounter
        // this for split integer operations where we want to use the type of the
        // entity causing the split. Also skip if the type is not a byte width
        // multiple.
-      if (ITy->getBitWidth() % 8 != 0 ||
-          ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+      if (UserITy->getBitWidth() % 8 != 0 ||
+          UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
          continue;
  
-      // If we have found an integer type use covering the alloca, use that
-      // regardless of the other types, as integers are often used for
-      // a "bucket of bits" type.
-      //
-      // NB: This *must* be the only return from inside the loop so that the
-      // order of slices doesn't impact the computed type.
-      return ITy;
-    } else if (IgnoreNonIntegralTypes) {
-      continue;
+      // Track the largest bitwidth integer type used in this way in case there
+      // is no common type.
+      if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
+        ITy = UserITy;
      }
-
-    if (Ty && Ty != UserTy)
-      IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
-
-    Ty = UserTy;
    }
-  return Ty;
+
+  return TyIsCommon ? Ty : ITy;
  }
  
  /// PHI instructions that use an alloca and are subsequently loaded can be
@@ -1207,7 +1262,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
  /// This will return the BasePtr if that is valid, or build a new GEP
  /// instruction using the IRBuilder if GEP-ing is needed.
  static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
-                       SmallVectorImpl<Value *> &Indices) {
+                       SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
    if (Indices.empty())
      return BasePtr;
  
@@ -1216,7 +1271,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
    if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
      return BasePtr;
  
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
+  return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
  }
  
  /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1230,9 +1285,10 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
  /// indicated by Indices to have the correct offset.
  static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
                                      Value *BasePtr, Type *Ty, Type *TargetTy,
-                                    SmallVectorImpl<Value *> &Indices) {
+                                    SmallVectorImpl<Value *> &Indices,
+                                    Twine NamePrefix) {
    if (Ty == TargetTy)
-    return buildGEP(IRB, BasePtr, Indices);
+    return buildGEP(IRB, BasePtr, Indices, NamePrefix);
  
    // See if we can descend into a struct and locate a field with the correct
    // type.
@@ -1259,7 +1315,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
    if (ElementTy != TargetTy)
      Indices.erase(Indices.end() - NumLayers, Indices.end());
  
-  return buildGEP(IRB, BasePtr, Indices);
+  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
  }
  
  /// \brief Recursively compute indices for a natural GEP.
@@ -1269,9 +1325,10 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
  static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
                                         Value *Ptr, Type *Ty, APInt &Offset,
                                         Type *TargetTy,
-                                       SmallVectorImpl<Value *> &Indices) {
+                                       SmallVectorImpl<Value *> &Indices,
+                                       Twine NamePrefix) {
    if (Offset == 0)
-    return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices);
+    return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, NamePrefix);
  
    // We can't recurse through pointer types.
    if (Ty->isPointerTy())
@@ -1291,7 +1348,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
      Offset -= NumSkippedElements * ElementSize;
      Indices.push_back(IRB.getInt(NumSkippedElements));
      return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
-                                    Offset, TargetTy, Indices);
+                                    Offset, TargetTy, Indices, NamePrefix);
    }
  
    if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1304,7 +1361,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
      Offset -= NumSkippedElements * ElementSize;
      Indices.push_back(IRB.getInt(NumSkippedElements));
      return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                    Indices);
+                                    Indices, NamePrefix);
    }
  
    StructType *STy = dyn_cast<StructType>(Ty);
@@ -1323,7 +1380,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
  
    Indices.push_back(IRB.getInt32(Index));
    return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices);
+                                  Indices, NamePrefix);
  }
  
  /// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1338,7 +1395,8 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
  /// If no natural GEP can be constructed, this function returns null.
  static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
                                        Value *Ptr, APInt Offset, Type *TargetTy,
-                                      SmallVectorImpl<Value *> &Indices) {
+                                      SmallVectorImpl<Value *> &Indices,
+                                      Twine NamePrefix) {
    PointerType *Ty = cast<PointerType>(Ptr->getType());
  
    // Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1357,7 +1415,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
    Offset -= NumSkippedElements * ElementSize;
    Indices.push_back(IRB.getInt(NumSkippedElements));
    return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices);
+                                  Indices, NamePrefix);
  }
  
  /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1375,8 +1433,9 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
  /// properties. The algorithm tries to fold as many constant indices into
  /// a single GEP as possible, thus making each GEP more independent of the
  /// surrounding code.
-static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
-                             Value *Ptr, APInt Offset, Type *PointerTy) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
+                             APInt Offset, Type *PointerTy,
+                             Twine NamePrefix) {
    // Even though we don't look through PHI nodes, we could be called on an
    // instruction in an unreachable block, which may be on a cycle.
    SmallPtrSet<Value *, 4> Visited;
@@ -1410,7 +1469,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
      // See if we can perform a natural GEP here.
      Indices.clear();
      if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
-                                           Indices)) {
+                                           Indices, NamePrefix)) {
        if (P->getType() == PointerTy) {
          // Zap any offset pointer that we ended up computing in previous rounds.
          if (OffsetPtr && OffsetPtr->use_empty())
@@ -1445,19 +1504,19 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
    if (!OffsetPtr) {
      if (!Int8Ptr) {
        Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
-                                  "raw_cast");
+                                  NamePrefix + "sroa_raw_cast");
        Int8PtrOffset = Offset;
      }
  
      OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
        IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
-                            "raw_idx");
+                            NamePrefix + "sroa_raw_idx");
    }
    Ptr = OffsetPtr;
  
    // On the off chance we were targeting i8*, guard the bitcast here.
    if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
+    Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
  
    return Ptr;
  }
@@ -1950,16 +2009,22 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
    // integer type will be stored here for easy access during rewriting.
    IntegerType *IntTy;
  
-  // The offset of the slice currently being rewritten.
+  // The original offset of the slice currently being rewritten relative to
+  // the original alloca.
    uint64_t BeginOffset, EndOffset;
+  // The new offsets of the slice currently being rewritten relative to the
+  // original alloca.
+  uint64_t NewBeginOffset, NewEndOffset;
+
+  uint64_t SliceSize;
    bool IsSplittable;
    bool IsSplit;
    Use *OldUse;
    Instruction *OldPtr;
  
-  // Output members carrying state about the result of visiting and rewriting
-  // the slice of the alloca.
-  bool IsUsedByRewrittenSpeculatableInstructions;
+  // Track post-rewrite users which are PHI nodes and Selects.
+  SmallPtrSetImpl<PHINode *> &PHIUsers;
+  SmallPtrSetImpl<SelectInst *> &SelectUsers;
  
    // Utility IR builder, whose name prefix is setup for each visited use, and
    // the insertion point is set to point to the user.
@@ -1968,11 +2033,14 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
  public:
    AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
                        AllocaInst &OldAI, AllocaInst &NewAI,
-                      uint64_t NewBeginOffset, uint64_t NewEndOffset,
-                      bool IsVectorPromotable = false,
-                      bool IsIntegerPromotable = false)
+                      uint64_t NewAllocaBeginOffset,
+                      uint64_t NewAllocaEndOffset, bool IsVectorPromotable,
+                      bool IsIntegerPromotable,
+                      SmallPtrSetImpl<PHINode *> &PHIUsers,
+                      SmallPtrSetImpl<SelectInst *> &SelectUsers)
        : DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
-        NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset),
+        NewAllocaBeginOffset(NewAllocaBeginOffset),
+        NewAllocaEndOffset(NewAllocaEndOffset),
          NewAllocaTy(NewAI.getAllocatedType()),
          VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
          ElementTy(VecTy ? VecTy->getElementType() : 0),
@@ -1983,7 +2051,7 @@ public:
                          DL.getTypeSizeInBits(NewAI.getAllocatedType()))
                    : 0),
          BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
-        OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false),
+        OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
          IRB(NewAI.getContext(), ConstantFolder()) {
      if (VecTy) {
        assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
@@ -2002,6 +2070,14 @@ public:
      IsSplit =
          BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
  
+    // Compute the intersecting offset range.
+    assert(BeginOffset < NewAllocaEndOffset);
+    assert(EndOffset > NewAllocaBeginOffset);
+    NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+    NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
+    SliceSize = NewEndOffset - NewBeginOffset;
+
      OldUse = I->getUse();
      OldPtr = cast<Instruction>(OldUse->get());
  
@@ -2016,20 +2092,6 @@ public:
      return CanSROA;
    }
  
-  /// \brief Query whether this slice is used by speculatable instructions after
-  /// rewriting.
-  ///
-  /// These instructions (PHIs and Selects currently) require the alloca slice
-  /// to run back through the rewriter. Thus, they are promotable, but not on
-  /// this iteration. This is distinct from a slice which is unpromotable for
-  /// some other reason, in which case we don't even want to perform the
-  /// speculation. This can be querried at any time and reflects whether (at
-  /// that point) a visit call has rewritten a speculatable instruction on the
-  /// current slice.
-  bool isUsedByRewrittenSpeculatableInstructions() const {
-    return IsUsedByRewrittenSpeculatableInstructions;
-  }
-
  private:
    // Make sure the other visit overloads are visible.
    using Base::visit;
@@ -2040,30 +2102,53 @@ private:
      llvm_unreachable("No rewrite rule for this instruction!");
    }
  
-  Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, uint64_t Offset,
-                              Type *PointerTy) {
-    assert(Offset >= NewAllocaBeginOffset);
-    return getAdjustedPtr(IRB, DL, &NewAI, APInt(DL.getPointerSizeInBits(),
-                                                 Offset - NewAllocaBeginOffset),
-                          PointerTy);
+  Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
+    // Note that the offset computation can use BeginOffset or NewBeginOffset
+    // interchangeably for unsplit slices.
+    assert(IsSplit || BeginOffset == NewBeginOffset);
+    uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+
+#ifndef NDEBUG
+    StringRef OldName = OldPtr->getName();
+    // Skip through the last '.sroa.' component of the name.
+    size_t LastSROAPrefix = OldName.rfind(".sroa.");
+    if (LastSROAPrefix != StringRef::npos) {
+      OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
+      // Look for an SROA slice index.
+      size_t IndexEnd = OldName.find_first_not_of("0123456789");
+      if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
+        // Strip the index and look for the offset.
+        OldName = OldName.substr(IndexEnd + 1);
+        size_t OffsetEnd = OldName.find_first_not_of("0123456789");
+        if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
+          // Strip the offset.
+          OldName = OldName.substr(OffsetEnd + 1);
+      }
+    }
+    // Strip any SROA suffixes as well.
+    OldName = OldName.substr(0, OldName.find(".sroa_"));
+#endif
+
+    return getAdjustedPtr(IRB, DL, &NewAI,
+                          APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
+#ifndef NDEBUG
+                          Twine(OldName) + "."
+#else
+                          Twine()
+#endif
+                          );
    }
  
-  /// \brief Compute suitable alignment to access an offset into the new alloca.
-  unsigned getOffsetAlign(uint64_t Offset) {
+  /// \brief Compute suitable alignment to access this slice of the *new* alloca.
+  ///
+  /// You can optionally pass a type to this routine and if that type's ABI
+  /// alignment is itself suitable, this will return zero.
+  unsigned getSliceAlign(Type *Ty = 0) {
      unsigned NewAIAlign = NewAI.getAlignment();
      if (!NewAIAlign)
        NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
-    return MinAlign(NewAIAlign, Offset);
-  }
-
-  /// \brief Compute suitable alignment to access a type at an offset of the
-  /// new alloca.
-  ///
-  /// \returns zero if the type's ABI alignment is a suitable alignment,
-  /// otherwise returns the maximal suitable alignment.
-  unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) {
-    unsigned Align = getOffsetAlign(Offset);
-    return Align == DL.getABITypeAlignment(Ty) ? 0 : Align;
+    unsigned Align = MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
+    return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
    }
  
    unsigned getIndex(uint64_t Offset) {
@@ -2081,8 +2166,7 @@ private:
        Pass.DeadInsts.insert(I);
    }
  
-  Value *rewriteVectorizedLoadInst(uint64_t NewBeginOffset,
-                                   uint64_t NewEndOffset) {
+  Value *rewriteVectorizedLoadInst() {
      unsigned BeginIndex = getIndex(NewBeginOffset);
      unsigned EndIndex = getIndex(NewEndOffset);
      assert(EndIndex > BeginIndex && "Empty vector!");
@@ -2092,8 +2176,7 @@ private:
      return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
    }
  
-  Value *rewriteIntegerLoad(LoadInst &LI, uint64_t NewBeginOffset,
-                            uint64_t NewEndOffset) {
+  Value *rewriteIntegerLoad(LoadInst &LI) {
      assert(IntTy && "We cannot insert an integer to the alloca");
      assert(!LI.isVolatile());
      Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2112,32 +2195,23 @@ private:
      Value *OldOp = LI.getOperand(0);
      assert(OldOp == OldPtr);
  
-    // Compute the intersecting offset range.
-    assert(BeginOffset < NewAllocaEndOffset);
-    assert(EndOffset > NewAllocaBeginOffset);
-    uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
-    uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
-    uint64_t Size = NewEndOffset - NewBeginOffset;
-
-    Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+    Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
                               : LI.getType();
      bool IsPtrAdjusted = false;
      Value *V;
      if (VecTy) {
-      V = rewriteVectorizedLoadInst(NewBeginOffset, NewEndOffset);
+      V = rewriteVectorizedLoadInst();
      } else if (IntTy && LI.getType()->isIntegerTy()) {
-      V = rewriteIntegerLoad(LI, NewBeginOffset, NewEndOffset);
+      V = rewriteIntegerLoad(LI);
      } else if (NewBeginOffset == NewAllocaBeginOffset &&
                 canConvertValue(DL, NewAllocaTy, LI.getType())) {
        V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                LI.isVolatile(), "load");
+                                LI.isVolatile(), LI.getName());
      } else {
        Type *LTy = TargetTy->getPointerTo();
-      V = IRB.CreateAlignedLoad(
-          getAdjustedAllocaPtr(IRB, NewBeginOffset, LTy),
-          getOffsetTypeAlign(TargetTy, NewBeginOffset - NewAllocaBeginOffset),
-          LI.isVolatile(), "load");
+      V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+                                getSliceAlign(TargetTy), LI.isVolatile(),
+                                LI.getName());
        IsPtrAdjusted = true;
      }
      V = convertValue(DL, IRB, V, TargetTy);
@@ -2146,7 +2220,7 @@ private:
        assert(!LI.isVolatile());
        assert(LI.getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
-      assert(Size < DL.getTypeStoreSize(LI.getType()) &&
+      assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
               "Split load isn't smaller than original load");
        assert(LI.getType()->getIntegerBitWidth() ==
               DL.getTypeStoreSizeInBits(LI.getType()) &&
@@ -2174,9 +2248,7 @@ private:
      return !LI.isVolatile() && !IsPtrAdjusted;
    }
  
-  bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
-                                  uint64_t NewBeginOffset,
-                                  uint64_t NewEndOffset) {
+  bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
      if (V->getType() != VecTy) {
        unsigned BeginIndex = getIndex(NewBeginOffset);
        unsigned EndIndex = getIndex(NewEndOffset);
@@ -2202,8 +2274,7 @@ private:
      return true;
    }
  
-  bool rewriteIntegerStore(Value *V, StoreInst &SI,
-                           uint64_t NewBeginOffset, uint64_t NewEndOffset) {
+  bool rewriteIntegerStore(Value *V, StoreInst &SI) {
      assert(IntTy && "We cannot extract an integer from the alloca");
      assert(!SI.isVolatile());
      if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
@@ -2236,30 +2307,22 @@ private:
        if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
          Pass.PostPromotionWorklist.insert(AI);
  
-    // Compute the intersecting offset range.
-    assert(BeginOffset < NewAllocaEndOffset);
-    assert(EndOffset > NewAllocaBeginOffset);
-    uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
-    uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
-    uint64_t Size = NewEndOffset - NewBeginOffset;
-    if (Size < DL.getTypeStoreSize(V->getType())) {
+    if (SliceSize < DL.getTypeStoreSize(V->getType())) {
        assert(!SI.isVolatile());
        assert(V->getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
        assert(V->getType()->getIntegerBitWidth() ==
               DL.getTypeStoreSizeInBits(V->getType()) &&
               "Non-byte-multiple bit width");
-      IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+      IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
        V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset,
                           "extract");
      }
  
      if (VecTy)
-      return rewriteVectorizedStoreInst(V, SI, OldOp, NewBeginOffset,
-                                        NewEndOffset);
+      return rewriteVectorizedStoreInst(V, SI, OldOp);
      if (IntTy && V->getType()->isIntegerTy())
-      return rewriteIntegerStore(V, SI, NewBeginOffset, NewEndOffset);
+      return rewriteIntegerStore(V, SI);
  
      StoreInst *NewSI;
      if (NewBeginOffset == NewAllocaBeginOffset &&
@@ -2269,12 +2332,9 @@ private:
        NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                       SI.isVolatile());
      } else {
-      Value *NewPtr = getAdjustedAllocaPtr(IRB, NewBeginOffset,
-                                           V->getType()->getPointerTo());
-      NewSI = IRB.CreateAlignedStore(
-          V, NewPtr, getOffsetTypeAlign(
-                         V->getType(), NewBeginOffset - NewAllocaBeginOffset),
-          SI.isVolatile());
+      Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
+      NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
+                                     SI.isVolatile());
      }
      (void)NewSI;
      Pass.DeadInsts.insert(&SI);
@@ -2326,11 +2386,10 @@ private:
      // pointer to the new alloca.
      if (!isa<Constant>(II.getLength())) {
        assert(!IsSplit);
-      assert(BeginOffset >= NewAllocaBeginOffset);
-      II.setDest(
-          getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+      assert(NewBeginOffset == BeginOffset);
+      II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
        Type *CstTy = II.getAlignmentCst()->getType();
-      II.setAlignment(ConstantInt::get(CstTy, getOffsetAlign(BeginOffset)));
+      II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
  
        deleteIfTriviallyDead(OldPtr);
        return false;
@@ -2342,13 +2401,6 @@ private:
      Type *AllocaTy = NewAI.getAllocatedType();
      Type *ScalarTy = AllocaTy->getScalarType();
  
-    // Compute the intersecting offset range.
-    assert(BeginOffset < NewAllocaEndOffset);
-    assert(EndOffset > NewAllocaBeginOffset);
-    uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
-    uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-    uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
-
      // If this doesn't map cleanly onto the alloca type, and that type isn't
      // a single value type, just emit a memset.
      if (!VecTy && !IntTy &&
@@ -2360,8 +2412,8 @@ private:
        Type *SizeTy = II.getLength()->getType();
        Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
        CallInst *New = IRB.CreateMemSet(
-          getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getRawDest()->getType()),
-          II.getValue(), Size, getOffsetAlign(SliceOffset), II.isVolatile());
+          getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
+          getSliceAlign(), II.isVolatile());
        (void)New;
        DEBUG(dbgs() << "          to: " << *New << "\n");
        return false;
@@ -2438,25 +2490,18 @@ private:
  
      DEBUG(dbgs() << "    original: " << II << "\n");
  
-    // Compute the intersecting offset range.
-    assert(BeginOffset < NewAllocaEndOffset);
-    assert(EndOffset > NewAllocaBeginOffset);
-    uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
-    uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
-    assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
-    bool IsDest = II.getRawDest() == OldPtr;
+    bool IsDest = &II.getRawDestUse() == OldUse;
+    assert((IsDest && II.getRawDest() == OldPtr) ||
+           (!IsDest && II.getRawSource() == OldPtr));
  
      // Compute the relative offset within the transfer.
      unsigned IntPtrWidth = DL.getPointerSizeInBits();
      APInt RelOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
  
      unsigned Align = II.getAlignment();
-    uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
      if (Align > 1)
-      Align =
-          MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
-                   MinAlign(II.getAlignment(), getOffsetAlign(SliceOffset)));
+      Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+                       MinAlign(II.getAlignment(), getSliceAlign()));
  
      // For unsplit intrinsics, we simply modify the source and destination
      // pointers in place. This isn't just an optimization, it is a matter of
@@ -2466,19 +2511,17 @@ private:
      // memcpy, and so simply updating the pointers is the necessary for us to
      // update both source and dest of a single call.
      if (!IsSplittable) {
-      Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
+      Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
        if (IsDest)
-        II.setDest(
-            getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+        II.setDest(AdjustedPtr);
        else
-        II.setSource(getAdjustedAllocaPtr(IRB, BeginOffset,
-                                          II.getRawSource()->getType()));
+        II.setSource(AdjustedPtr);
  
        Type *CstTy = II.getAlignmentCst()->getType();
        II.setAlignment(ConstantInt::get(CstTy, Align));
  
        DEBUG(dbgs() << "          to: " << II << "\n");
-      deleteIfTriviallyDead(OldOp);
+      deleteIfTriviallyDead(OldPtr);
        return false;
      }
      // For split transfer intrinsics we have an incredibly useful assurance:
@@ -2521,16 +2564,14 @@ private:
      }
  
      if (EmitMemCpy) {
-      Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
-                                : II.getRawDest()->getType();
+      Type *OtherPtrTy = OtherPtr->getType();
  
        // Compute the other pointer, folding as much as possible to produce
        // a single, simple GEP in most cases.
-      OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+      OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy,
+                                OtherPtr->getName() + ".");
  
-      Value *OurPtr = getAdjustedAllocaPtr(
-          IRB, NewBeginOffset,
-          IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType());
+      Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
        Type *SizeTy = II.getLength()->getType();
        Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
  
@@ -2569,7 +2610,8 @@ private:
        OtherPtrTy = SubIntTy->getPointerTo();
      }
  
-    Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+    Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy,
+                                   OtherPtr->getName() + ".");
      Value *DstPtr = &NewAI;
      if (!IsDest)
        std::swap(SrcPtr, DstPtr);
@@ -2616,20 +2658,13 @@ private:
      DEBUG(dbgs() << "    original: " << II << "\n");
      assert(II.getArgOperand(1) == OldPtr);
  
-    // Compute the intersecting offset range.
-    assert(BeginOffset < NewAllocaEndOffset);
-    assert(EndOffset > NewAllocaBeginOffset);
-    uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
-    uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
      // Record this instruction for deletion.
      Pass.DeadInsts.insert(&II);
  
      ConstantInt *Size
        = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
                           NewEndOffset - NewBeginOffset);
-    Value *Ptr =
-        getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getArgOperand(1)->getType());
+    Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
      Value *New;
      if (II.getIntrinsicID() == Intrinsic::lifetime_start)
        New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -2650,28 +2685,22 @@ private:
      // as local as possible to the PHI. To do that, we re-use the location of
      // the old pointer, which necessarily must be in the right position to
      // dominate the PHI.
-    IRBuilderTy PtrBuilder(OldPtr);
-    PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
-                             ".");
+    IRBuilderTy PtrBuilder(IRB);
+    PtrBuilder.SetInsertPoint(OldPtr);
+    PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
  
-    Value *NewPtr =
-        getAdjustedAllocaPtr(PtrBuilder, BeginOffset, OldPtr->getType());
+    Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
      // Replace the operands which were using the old pointer.
      std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
  
      DEBUG(dbgs() << "          to: " << PN << "\n");
      deleteIfTriviallyDead(OldPtr);
  
-    // Check whether we can speculate this PHI node, and if so remember that
-    // fact and queue it up for another iteration after the speculation
-    // occurs.
-    if (isSafePHIToSpeculate(PN, &DL)) {
-      Pass.SpeculatablePHIs.insert(&PN);
-      IsUsedByRewrittenSpeculatableInstructions = true;
-      return true;
-    }
-
-    return false; // PHIs can't be promoted on their own.
+    // PHIs can't be promoted on their own, but often can be speculated. We
+    // check the speculation outside of the rewriter so that we see the
+    // fully-rewritten alloca.
+    PHIUsers.insert(&PN);
+    return true;
    }
  
    bool visitSelectInst(SelectInst &SI) {
@@ -2681,7 +2710,7 @@ private:
      assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
      assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
  
-    Value *NewPtr = getAdjustedAllocaPtr(IRB, BeginOffset, OldPtr->getType());
+    Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
      // Replace the operands which were using the old pointer.
      if (SI.getOperand(1) == OldPtr)
        SI.setOperand(1, NewPtr);
@@ -2691,16 +2720,11 @@ private:
      DEBUG(dbgs() << "          to: " << SI << "\n");
      deleteIfTriviallyDead(OldPtr);
  
-    // Check whether we can speculate this select instruction, and if so
-    // remember that fact and queue it up for another iteration after the
-    // speculation occurs.
-    if (isSafeSelectToSpeculate(SI, &DL)) {
-      Pass.SpeculatableSelects.insert(&SI);
-      IsUsedByRewrittenSpeculatableInstructions = true;
-      return true;
-    }
-
-    return false; // Selects can't be promoted on their own.
+    // Selects can't be promoted on their own, but often can be speculated. We
+    // check the speculation outside of the rewriter so that we see the
+    // fully-rewritten alloca.
+    SelectUsers.insert(&SI);
+    return true;
    }
  
  };
@@ -3136,17 +3160,17 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
                 << "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
                 << "\n");
  
-  // Track the high watermark on several worklists that are only relevant for
+  // Track the high watermark on the worklist as it is only relevant for
    // promoted allocas. We will reset it to this point if the alloca is not in
    // fact scheduled for promotion.
    unsigned PPWOldSize = PostPromotionWorklist.size();
-  unsigned SPOldSize = SpeculatablePHIs.size();
-  unsigned SSOldSize = SpeculatableSelects.size();
    unsigned NumUses = 0;
+  SmallPtrSet<PHINode *, 8> PHIUsers;
+  SmallPtrSet<SelectInst *, 8> SelectUsers;
  
    AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
                                 EndOffset, IsVectorPromotable,
-                               IsIntegerPromotable);
+                               IsIntegerPromotable, PHIUsers, SelectUsers);
    bool Promotable = true;
    for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
                                                          SUE = SplitUses.end();
@@ -3167,33 +3191,55 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
    MaxUsesPerAllocaPartition =
        std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
  
-  if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) {
-    DEBUG(dbgs() << "  and queuing for promotion\n");
-    PromotableAllocas.push_back(NewAI);
-  } else if (NewAI != &AI ||
-             (Promotable &&
-              Rewriter.isUsedByRewrittenSpeculatableInstructions())) {
+  // Now that we've processed all the slices in the new partition, check if any
+  // PHIs or Selects would block promotion.
+  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+                                            E = PHIUsers.end();
+       I != E; ++I)
+    if (!isSafePHIToSpeculate(**I, DL)) {
+      Promotable = false;
+      PHIUsers.clear();
+      SelectUsers.clear();
+      break;
+    }
+  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+                                               E = SelectUsers.end();
+       I != E; ++I)
+    if (!isSafeSelectToSpeculate(**I, DL)) {
+      Promotable = false;
+      PHIUsers.clear();
+      SelectUsers.clear();
+      break;
+    }
+
+  if (Promotable) {
+    if (PHIUsers.empty() && SelectUsers.empty()) {
+      // Promote the alloca.
+      PromotableAllocas.push_back(NewAI);
+    } else {
+      // If we have either PHIs or Selects to speculate, add them to those
+      // worklists and re-queue the new alloca so that we promote in on the
+      // next iteration.
+      for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+                                                E = PHIUsers.end();
+           I != E; ++I)
+        SpeculatablePHIs.insert(*I);
+      for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+                                                   E = SelectUsers.end();
+           I != E; ++I)
+        SpeculatableSelects.insert(*I);
+      Worklist.insert(NewAI);
+    }
+  } else {
      // If we can't promote the alloca, iterate on it to check for new
      // refinements exposed by splitting the current alloca. Don't iterate on an
      // alloca which didn't actually change and didn't get promoted.
-    //
-    // Alternatively, if we could promote the alloca but have speculatable
-    // instructions then we will speculate them after finishing our processing
-    // of the original alloca. Mark the new one for re-visiting in the next
-    // iteration so the speculated operations can be rewritten.
-    //
-    // FIXME: We should actually track whether the rewriter changed anything.
-    Worklist.insert(NewAI);
-  }
-
-  // Drop any post-promotion work items if promotion didn't happen.
-  if (!Promotable) {
+    if (NewAI != &AI)
+      Worklist.insert(NewAI);
+
+    // Drop any post-promotion work items if promotion didn't happen.
      while (PostPromotionWorklist.size() > PPWOldSize)
        PostPromotionWorklist.pop_back();
-    while (SpeculatablePHIs.size() > SPOldSize)
-      SpeculatablePHIs.pop_back();
-    while (SpeculatableSelects.size() > SSOldSize)
-      SpeculatableSelects.pop_back();
    }
  
    return true;
@@ -3575,13 +3621,17 @@ namespace {
  }
  
  bool SROA::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
    DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
    C = &F.getContext();
-  DL = getAnalysisIfAvailable<DataLayout>();
-  if (!DL) {
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  if (!DLP) {
      DEBUG(dbgs() << "  Skipping SROA -- no target data!\n");
      return false;
    }
+  DL = &DLP->getDataLayout();
    DominatorTreeWrapperPass *DTWP =
        getAnalysisIfAvailable<DominatorTreeWrapperPass>();
    DT = DTWP ? &DTWP->getDomTree() : 0;