Fix a typo (the the => the)

[oota-llvm.git] / lib / Transforms / Scalar / ObjCARC.cpp
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp

index c74c353e81a9b284944000b7e370311313be900a..64f71d380a8ae2305464bc832e5c382e76d6b3f2 100644 (file)
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -20,7 +20,7 @@
  // This file also defines a simple ARC-aware AliasAnalysis.
  //
  // WARNING: This file knows about certain library functions. It recognizes them
-// by name, and hardwires knowedge of their semantics.
+// by name, and hardwires knowledge of their semantics.
  //
  // WARNING: This file knows about how certain Objective-C library functions are
  // used. Naive LLVM IR transformations which would otherwise be
@@ -29,18 +29,8 @@
  //===----------------------------------------------------------------------===//
  
  #define DEBUG_TYPE "objc-arc"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CallSite.h"
  #include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/StringSwitch.h"
  #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
  using namespace llvm;
  
  // A handy option to enable/disable all optimizations in this file.
@@ -88,13 +78,14 @@ namespace {
      }
  #endif
  
-    ValueT &operator[](KeyT Arg) {
+    ValueT &operator[](const KeyT &Arg) {
        std::pair<typename MapTy::iterator, bool> Pair =
          Map.insert(std::make_pair(Arg, size_t(0)));
        if (Pair.second) {
-        Pair.first->second = Vector.size();
+        size_t Num = Vector.size();
+        Pair.first->second = Num;
          Vector.push_back(std::make_pair(Arg, ValueT()));
-        return Vector.back().second;
+        return Vector[Num].second;
        }
        return Vector[Pair.first->second].second;
      }
@@ -104,14 +95,15 @@ namespace {
        std::pair<typename MapTy::iterator, bool> Pair =
          Map.insert(std::make_pair(InsertPair.first, size_t(0)));
        if (Pair.second) {
-        Pair.first->second = Vector.size();
+        size_t Num = Vector.size();
+        Pair.first->second = Num;
          Vector.push_back(InsertPair);
-        return std::make_pair(llvm::prior(Vector.end()), true);
+        return std::make_pair(Vector.begin() + Num, true);
        }
        return std::make_pair(Vector.begin() + Pair.first->second, false);
      }
  
-    const_iterator find(KeyT Key) const {
+    const_iterator find(const KeyT &Key) const {
        typename MapTy::const_iterator It = Map.find(Key);
        if (It == Map.end()) return Vector.end();
        return Vector.begin() + It->second;
@@ -121,7 +113,7 @@ namespace {
      /// from the vector, it just zeros out the key in the vector. This leaves
      /// iterators intact, but clients must be prepared for zeroed-out keys when
      /// iterating.
-    void blot(KeyT Key) {
+    void blot(const KeyT &Key) {
        typename MapTy::iterator It = Map.find(Key);
        if (It == Map.end()) return;
        Vector[It->second].first = KeyT();
@@ -139,6 +131,13 @@ namespace {
  // ARC Utilities.
  //===----------------------------------------------------------------------===//
  
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/StringSwitch.h"
+
  namespace {
    /// InstructionClass - A simple classification for instructions.
    enum InstructionClass {
@@ -160,6 +159,7 @@ namespace {
      IC_MoveWeak,            ///< objc_moveWeak (derived)
      IC_CopyWeak,            ///< objc_copyWeak (derived)
      IC_DestroyWeak,         ///< objc_destroyWeak (derived)
+    IC_StoreStrong,         ///< objc_storeStrong (derived)
      IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
      IC_Call,                ///< could call objc_release
      IC_User,                ///< could "use" a pointer
@@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) {
          Arg->hasNestAttr() ||
          Arg->hasStructRetAttr())
        return false;
-  // Only consider values with pointer types, and not function pointers.
+  // Only consider values with pointer types.
+  // It seemes intuitive to exclude function pointer types as well, since
+  // functions are never reference-counted, however clang occasionally
+  // bitcasts reference-counted pointers to function-pointer type
+  // temporarily.
    PointerType *Ty = dyn_cast<PointerType>(Op->getType());
-  if (!Ty || isa<FunctionType>(Ty->getElementType()))
+  if (!Ty)
      return false;
    // Conservatively assume anything else is a potential use.
    return true;
@@ -256,6 +260,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
                return StringSwitch<InstructionClass>(F->getName())
                       .Case("objc_storeWeak",             IC_StoreWeak)
                       .Case("objc_initWeak",              IC_InitWeak)
+                     .Case("objc_storeStrong",           IC_StoreStrong)
                       .Default(IC_CallOrUser);
              // Second argument is i8**.
              if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
@@ -291,22 +296,23 @@ static InstructionClass GetInstructionClass(const Value *V) {
          // None of the intrinsic functions do objc_release. For intrinsics, the
          // only question is whether or not they may be users.
          switch (F->getIntrinsicID()) {
-        case 0: break;
-        case Intrinsic::bswap: case Intrinsic::ctpop:
-        case Intrinsic::ctlz: case Intrinsic::cttz:
          case Intrinsic::returnaddress: case Intrinsic::frameaddress:
          case Intrinsic::stacksave: case Intrinsic::stackrestore:
          case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+        case Intrinsic::objectsize: case Intrinsic::prefetch:
+        case Intrinsic::stackprotector:
+        case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
+        case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
+        case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
+        case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
+        case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+        case Intrinsic::invariant_start: case Intrinsic::invariant_end:
          // Don't let dbg info affect our results.
          case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
            // Short cut: Some intrinsics obviously don't use ObjC pointers.
            return IC_None;
          default:
-          for (Function::const_arg_iterator AI = F->arg_begin(),
-               AE = F->arg_end(); AI != AE; ++AI)
-            if (IsPotentialUse(AI))
-              return IC_User;
-          return IC_None;
+          break;
          }
        }
        return GetCallSiteClass(CI);
@@ -344,6 +350,10 @@ static InstructionClass GetInstructionClass(const Value *V) {
        break;
      default:
        // For anything else, check all the operands.
+      // Note that this includes both operands of a Store: while the first
+      // operand isn't actually being dereferenced, it is being stored to
+      // memory where we can no longer track who might read it and dereference
+      // it, so we have to consider it potentially used.
        for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
             OI != OE; ++OI)
          if (IsPotentialUse(*OI))
@@ -367,17 +377,17 @@ static InstructionClass GetBasicInstructionClass(const Value *V) {
    }
  
    // Otherwise, be conservative.
-  return IC_User;
+  return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
  }
  
-/// IsRetain - Test if the the given class is objc_retain or
+/// IsRetain - Test if the given class is objc_retain or
  /// equivalent.
  static bool IsRetain(InstructionClass Class) {
    return Class == IC_Retain ||
           Class == IC_RetainRV;
  }
  
-/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// IsAutorelease - Test if the given class is objc_autorelease or
  /// equivalent.
  static bool IsAutorelease(InstructionClass Class) {
    return Class == IC_Autorelease ||
@@ -421,9 +431,10 @@ static bool IsAlwaysTail(InstructionClass Class) {
  /// IsNoThrow - Test if the given class represents instructions which are always
  /// safe to mark with the nounwind attribute..
  static bool IsNoThrow(InstructionClass Class) {
+  // objc_retainBlock is not nounwind because it calls user copy constructors
+  // which could theoretically throw.
    return Class == IC_Retain ||
           Class == IC_RetainRV ||
-         Class == IC_RetainBlock ||
           Class == IC_Release ||
           Class == IC_Autorelease ||
           Class == IC_AutoreleaseRV ||
@@ -431,7 +442,7 @@ static bool IsNoThrow(InstructionClass Class) {
           Class == IC_AutoreleasepoolPop;
  }
  
-/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// EraseInstruction - Erase the given instruction. Many ObjC calls return their
  /// argument verbatim, so if it's such a call and the return value has users,
  /// replace them with the argument value.
  static void EraseInstruction(Instruction *CI) {
@@ -515,6 +526,10 @@ static bool IsObjCIdentifiedObject(const Value *V) {
      const Value *Pointer =
        StripPointerCastsAndObjCCalls(LI->getPointerOperand());
      if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+      // A constant pointer can't be pointing to an object on the heap. It may
+      // be reference-counted, but it won't be deleted.
+      if (GV->isConstant())
+        return true;
        StringRef Name = GV->getName();
        // These special variables are known to hold values which are not
        // reference-counted pointers.
@@ -548,9 +563,8 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
      return Arg;
    }
  
-  // If we found an identifiable object but it has multiple uses, but they
-  // are trivial uses, we can still consider this to be a single-use
-  // value.
+  // If we found an identifiable object but it has multiple uses, but they are
+  // trivial uses, we can still consider this to be a single-use value.
    if (IsObjCIdentifiedObject(Arg)) {
      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
           UI != UE; ++UI) {
@@ -588,6 +602,59 @@ static bool ModuleHasARC(const Module &M) {
      M.getNamedValue("objc_unretainedPointer");
  }
  
+/// DoesObjCBlockEscape - Test whether the given pointer, which is an
+/// Objective C block pointer, does not "escape". This differs from regular
+/// escape analysis in that a use as an argument to a call is not considered
+/// an escape.
+static bool DoesObjCBlockEscape(const Value *BlockPtr) {
+  // Walk the def-use chains.
+  SmallVector<const Value *, 4> Worklist;
+  Worklist.push_back(BlockPtr);
+  do {
+    const Value *V = Worklist.pop_back_val();
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      const User *UUser = *UI;
+      // Special - Use by a call (callee or argument) is not considered
+      // to be an escape.
+      switch (GetBasicInstructionClass(UUser)) {
+      case IC_StoreWeak:
+      case IC_InitWeak:
+      case IC_StoreStrong:
+      case IC_Autorelease:
+      case IC_AutoreleaseRV:
+        // These special functions make copies of their pointer arguments.
+        return true;
+      case IC_User:
+      case IC_None:
+        // Use by an instruction which copies the value is an escape if the
+        // result is an escape.
+        if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+            isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+          Worklist.push_back(UUser);
+          continue;
+        }
+        // Use by a load is not an escape.
+        if (isa<LoadInst>(UUser))
+          continue;
+        // Use by a store is not an escape if the use is the address.
+        if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+          if (V != SI->getValueOperand())
+            continue;
+        break;
+      default:
+        // Regular calls and other stuff are not considered escapes.
+        continue;
+      }
+      // Otherwise, conservatively assume an escape.
+      return true;
+    }
+  } while (!Worklist.empty());
+
+  // No escapes found.
+  return false;
+}
+
  //===----------------------------------------------------------------------===//
  // ARC AliasAnalysis.
  //===----------------------------------------------------------------------===//
@@ -622,7 +689,7 @@ namespace {
      /// specified pass info.
      virtual void *getAdjustedAnalysisPointer(const void *PI) {
        if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
+        return static_cast<AliasAnalysis *>(this);
        return this;
      }
  
@@ -738,7 +805,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
    switch (GetBasicInstructionClass(CS.getInstruction())) {
    case IC_Retain:
    case IC_RetainRV:
-  case IC_RetainBlock:
    case IC_Autorelease:
    case IC_AutoreleaseRV:
    case IC_NoopCast:
@@ -746,6 +812,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
    case IC_FusedRetainAutorelease:
    case IC_FusedRetainAutoreleaseRV:
      // These functions don't access any memory visible to the compiler.
+    // Note that this doesn't include objc_retainBlock, because it updates
+    // pointers when it copies block data.
      return NoModRef;
    default:
      break;
@@ -827,7 +895,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
        // These calls return their argument verbatim, as a low-level
        // optimization. However, this makes high-level optimizations
        // harder. Undo any uses of this optimization that the front-end
-      // emitted here. We'll redo them in a later pass.
+      // emitted here. We'll redo them in the contract pass.
        Changed = true;
        Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
        break;
@@ -839,6 +907,149 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
    return Changed;
  }
  
+//===----------------------------------------------------------------------===//
+// ARC autorelease pool elimination.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace {
+  /// ObjCARCAPElim - Autorelease pool elimination.
+  class ObjCARCAPElim : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnModule(Module &M);
+
+    static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
+    static bool OptimizeBB(BasicBlock *BB);
+
+  public:
+    static char ID;
+    ObjCARCAPElim() : ModulePass(ID) {
+      initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim,
+                "objc-arc-apelim",
+                "ObjC ARC autorelease pool elimination",
+                false, false)
+
+Pass *llvm::createObjCARCAPElimPass() {
+  return new ObjCARCAPElim();
+}
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+/// MayAutorelease - Interprocedurally determine if calls made by the
+/// given call site can possibly produce autoreleases.
+bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
+  if (const Function *Callee = CS.getCalledFunction()) {
+    if (Callee->isDeclaration() || Callee->mayBeOverridden())
+      return true;
+    for (Function::const_iterator I = Callee->begin(), E = Callee->end();
+         I != E; ++I) {
+      const BasicBlock *BB = I;
+      for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
+           J != F; ++J)
+        if (ImmutableCallSite JCS = ImmutableCallSite(J))
+          // This recursion depth limit is arbitrary. It's just great
+          // enough to cover known interesting testcases.
+          if (Depth < 3 &&
+              !JCS.onlyReadsMemory() &&
+              MayAutorelease(JCS, Depth + 1))
+            return true;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+  bool Changed = false;
+
+  Instruction *Push = 0;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPush:
+      Push = Inst;
+      break;
+    case IC_AutoreleasepoolPop:
+      // If this pop matches a push and nothing in between can autorelease,
+      // zap the pair.
+      if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+        Changed = true;
+        Inst->eraseFromParent();
+        Push->eraseFromParent();
+      }
+      Push = 0;
+      break;
+    case IC_CallOrUser:
+      if (MayAutorelease(ImmutableCallSite(Inst)))
+        Push = 0;
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!ModuleHasARC(M))
+    return false;
+
+  // Find the llvm.global_ctors variable, as the first step in
+  // identifying the global constructors. In theory, unnecessary autorelease
+  // pools could occur anywhere, but in practice it's pretty rare. Global
+  // ctors are a place where autorelease pools get inserted automatically,
+  // so it's pretty common for them to be unnecessary, and it's pretty
+  // profitable to eliminate them.
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return false;
+
+  assert(GV->hasDefinitiveInitializer() &&
+         "llvm.global_ctors is uncooperative!");
+
+  bool Changed = false;
+
+  // Dig the constructor functions out of GV's initializer.
+  ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+  for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
+       OI != OE; ++OI) {
+    Value *Op = *OI;
+    // llvm.global_ctors is an array of pairs where the second members
+    // are constructor functions.
+    Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+    // If the user used a constructor function with the wrong signature and
+    // it got bitcasted or whatever, look the other way.
+    if (!F)
+      continue;
+    // Only look at function definitions.
+    if (F->isDeclaration())
+      continue;
+    // Only look at functions with one basic block.
+    if (llvm::next(F->begin()) != F->end())
+      continue;
+    // Ok, a single-block constructor function definition. Try to optimize it.
+    Changed |= OptimizeBB(F->begin());
+  }
+
+  return Changed;
+}
+
  //===----------------------------------------------------------------------===//
  // ARC optimization.
  //===----------------------------------------------------------------------===//
@@ -877,15 +1088,14 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
  // usually can't sink them past other calls, which would be the main
  // case where it would be useful.
  
-/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
  
-#include "llvm/GlobalAlias.h"
-#include "llvm/Constants.h"
  #include "llvm/LLVMContext.h"
-#include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
  
  STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
  STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -933,22 +1143,13 @@ bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
    // If the values are Selects with the same condition, we can do a more precise
    // check: just check for relations between the values on corresponding arms.
    if (const SelectInst *SB = dyn_cast<SelectInst>(B))
-    if (A->getCondition() == SB->getCondition()) {
-      if (related(A->getTrueValue(), SB->getTrueValue()))
-        return true;
-      if (related(A->getFalseValue(), SB->getFalseValue()))
-        return true;
-      return false;
-    }
+    if (A->getCondition() == SB->getCondition())
+      return related(A->getTrueValue(), SB->getTrueValue()) ||
+             related(A->getFalseValue(), SB->getFalseValue());
  
    // Check both arms of the Select node individually.
-  if (related(A->getTrueValue(), B))
-    return true;
-  if (related(A->getFalseValue(), B))
-    return true;
-
-  // The arms both checked out.
-  return false;
+  return related(A->getTrueValue(), B) ||
+         related(A->getFalseValue(), B);
  }
  
  bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
@@ -1124,13 +1325,19 @@ namespace {
    /// retain-decrement-use-release sequence or release-use-decrement-retain
    /// reverese sequence.
    struct RRInfo {
-    /// KnownIncremented - After an objc_retain, the reference count of the
-    /// referenced object is known to be positive. Similarly, before an
-    /// objc_release, the reference count of the referenced object is known to
-    /// be positive. If there are retain-release pairs in code regions where the
-    /// retain count is known to be positive, they can be eliminated, regardless
-    /// of any side effects between them.
-    bool KnownIncremented;
+    /// KnownSafe - After an objc_retain, the reference count of the referenced
+    /// object is known to be positive. Similarly, before an objc_release, the
+    /// reference count of the referenced object is known to be positive. If
+    /// there are retain-release pairs in code regions where the retain count
+    /// is known to be positive, they can be eliminated, regardless of any side
+    /// effects between them.
+    ///
+    /// Also, a retain+release pair nested within another retain+release
+    /// pair all on the known same pointer value can be eliminated, regardless
+    /// of any intervening side effects.
+    ///
+    /// KnownSafe is true when either of these conditions is satisfied.
+    bool KnownSafe;
  
      /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
      /// opposed to objc_retain calls).
@@ -1153,7 +1360,8 @@ namespace {
      SmallPtrSet<Instruction *, 2> ReverseInsertPts;
  
      RRInfo() :
-      KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+      KnownSafe(false), IsRetainBlock(false),
+      IsTailCallRelease(false),
        ReleaseMetadata(0) {}
  
      void clear();
@@ -1161,7 +1369,7 @@ namespace {
  }
  
  void RRInfo::clear() {
-  KnownIncremented = false;
+  KnownSafe = false;
    IsRetainBlock = false;
    IsTailCallRelease = false;
    ReleaseMetadata = 0;
@@ -1173,47 +1381,55 @@ namespace {
    /// PtrState - This class summarizes several per-pointer runtime properties
    /// which are propogated through the flow graph.
    class PtrState {
-    /// RefCount - The known minimum number of reference count increments.
-    unsigned RefCount;
+    /// NestCount - The known minimum level of retain+release nesting.
+    unsigned NestCount;
+
+    /// KnownPositiveRefCount - True if the reference count is known to
+    /// be incremented.
+    bool KnownPositiveRefCount;
+
+    /// Partial - True of we've seen an opportunity for partial RR elimination,
+    /// such as pushing calls into a CFG triangle or into one side of a
+    /// CFG diamond.
+    bool Partial;
  
      /// Seq - The current position in the sequence.
-    Sequence Seq;
+    Sequence Seq : 8;
  
    public:
      /// RRI - Unidirectional information about the current sequence.
      /// TODO: Encapsulate this better.
      RRInfo RRI;
  
-    PtrState() : RefCount(0), Seq(S_None) {}
+    PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false),
+                 Seq(S_None) {}
  
-    void SetAtLeastOneRefCount()  {
-      if (RefCount == 0) RefCount = 1;
+    void SetKnownPositiveRefCount() {
+      KnownPositiveRefCount = true;
      }
  
-    void IncrementRefCount() {
-      if (RefCount != UINT_MAX) ++RefCount;
+    void ClearRefCount() {
+      KnownPositiveRefCount = false;
      }
  
-    void DecrementRefCount() {
-      if (RefCount != 0) --RefCount;
+    bool IsKnownIncremented() const {
+      return KnownPositiveRefCount;
      }
  
-    bool IsKnownIncremented() const {
-      return RefCount > 0;
+    void IncrementNestCount() {
+      if (NestCount != UINT_MAX) ++NestCount;
      }
  
-    void SetSeq(Sequence NewSeq) {
-      Seq = NewSeq;
+    void DecrementNestCount() {
+      if (NestCount != 0) --NestCount;
      }
  
-    void SetSeqToRelease(MDNode *M) {
-      if (Seq == S_None || Seq == S_Use) {
-        Seq = M ? S_MovableRelease : S_Release;
-        RRI.ReleaseMetadata = M;
-      } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
-        Seq = S_Release;
-        RRI.ReleaseMetadata = 0;
-      }
+    bool IsKnownNested() const {
+      return NestCount > 0;
+    }
+
+    void SetSeq(Sequence NewSeq) {
+      Seq = NewSeq;
      }
  
      Sequence GetSeq() const {
@@ -1221,7 +1437,12 @@ namespace {
      }
  
      void ClearSequenceProgress() {
-      Seq = S_None;
+      ResetSequenceProgress(S_None);
+    }
+
+    void ResetSequenceProgress(Sequence NewSeq) {
+      Seq = NewSeq;
+      Partial = false;
        RRI.clear();
      }
  
@@ -1232,24 +1453,40 @@ namespace {
  void
  PtrState::Merge(const PtrState &Other, bool TopDown) {
    Seq = MergeSeqs(Seq, Other.Seq, TopDown);
-  RefCount = std::min(RefCount, Other.RefCount);
+  KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
+  NestCount = std::min(NestCount, Other.NestCount);
  
    // We can't merge a plain objc_retain with an objc_retainBlock.
    if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
      Seq = S_None;
  
+  // If we're not in a sequence (anymore), drop all associated state.
    if (Seq == S_None) {
+    Partial = false;
      RRI.clear();
+  } else if (Partial || Other.Partial) {
+    // If we're doing a merge on a path that's previously seen a partial
+    // merge, conservatively drop the sequence, to avoid doing partial
+    // RR elimination. If the branch predicates for the two merge differ,
+    // mixing them is unsafe.
+    ClearSequenceProgress();
    } else {
      // Conservatively merge the ReleaseMetadata information.
      if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
        RRI.ReleaseMetadata = 0;
  
-    RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
-    RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+    RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
+    RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
+                            Other.RRI.IsTailCallRelease;
      RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
-    RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
-                                Other.RRI.ReverseInsertPts.end());
+
+    // Merge the insert point sets. If there are any differences,
+    // that makes this a partial merge.
+    Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
+    for (SmallPtrSet<Instruction *, 2>::const_iterator
+         I = Other.RRI.ReverseInsertPts.begin(),
+         E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+      Partial |= RRI.ReverseInsertPts.insert(*I);
    }
  }
  
@@ -1275,6 +1512,11 @@ namespace {
      /// known about a pointer at the top of each block.
      MapTy PerPtrBottomUp;
  
+    /// Preds, Succs - Effective successors and predecessors of the current
+    /// block (this ignores ignorable edges and ignored backedges).
+    SmallVector<BasicBlock *, 2> Preds;
+    SmallVector<BasicBlock *, 2> Succs;
+
    public:
      BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
  
@@ -1332,14 +1574,22 @@ namespace {
      /// entry to an exit which pass through this block. This is only valid
      /// after both the top-down and bottom-up traversals are complete.
      unsigned GetAllPathCount() const {
+      assert(TopDownPathCount != 0);
+      assert(BottomUpPathCount != 0);
        return TopDownPathCount * BottomUpPathCount;
      }
  
-    /// IsVisitedTopDown - Test whether the block for this BBState has been
-    /// visited by the top-down portion of the algorithm.
-    bool isVisitedTopDown() const {
-      return TopDownPathCount != 0;
-    }
+    // Specialized CFG utilities.
+    typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
+    edge_iterator pred_begin() { return Preds.begin(); }
+    edge_iterator pred_end() { return Preds.end(); }
+    edge_iterator succ_begin() { return Succs.begin(); }
+    edge_iterator succ_end() { return Succs.end(); }
+
+    void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
+    void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
+
+    bool isExit() const { return Succs.empty(); }
    };
  }
  
@@ -1426,6 +1676,14 @@ namespace {
      /// metadata.
      unsigned ImpreciseReleaseMDKind;
  
+    /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
+    /// metadata.
+    unsigned CopyOnEscapeMDKind;
+
+    /// NoObjCARCExceptionsMDKind - The Metadata Kind for
+    /// clang.arc.no_objc_arc_exceptions metadata.
+    unsigned NoObjCARCExceptionsMDKind;
+
      Constant *getRetainRVCallee(Module *M);
      Constant *getAutoreleaseRVCallee(Module *M);
      Constant *getReleaseCallee(Module *M);
@@ -1433,6 +1691,8 @@ namespace {
      Constant *getRetainBlockCallee(Module *M);
      Constant *getAutoreleaseCallee(Module *M);
  
+    bool IsRetainBlockOptimizable(const Instruction *Inst);
+
      void OptimizeRetainCall(Function &F, Instruction *Retain);
      bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
      void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
@@ -1441,9 +1701,16 @@ namespace {
      void CheckForCFGHazards(const BasicBlock *BB,
                              DenseMap<const BasicBlock *, BBState> &BBStates,
                              BBState &MyStates) const;
+    bool VisitInstructionBottomUp(Instruction *Inst,
+                                  BasicBlock *BB,
+                                  MapVector<Value *, RRInfo> &Retains,
+                                  BBState &MyStates);
      bool VisitBottomUp(BasicBlock *BB,
                         DenseMap<const BasicBlock *, BBState> &BBStates,
                         MapVector<Value *, RRInfo> &Retains);
+    bool VisitInstructionTopDown(Instruction *Inst,
+                                 DenseMap<Value *, RRInfo> &Releases,
+                                 BBState &MyStates);
      bool VisitTopDown(BasicBlock *BB,
                        DenseMap<const BasicBlock *, BBState> &BBStates,
                        DenseMap<Value *, RRInfo> &Releases);
@@ -1500,16 +1767,29 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesCFG();
  }
  
+bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
+  // Without the magic metadata tag, we have to assume this might be an
+  // objc_retainBlock call inserted to convert a block pointer to an id,
+  // in which case it really is needed.
+  if (!Inst->getMetadata(CopyOnEscapeMDKind))
+    return false;
+
+  // If the pointer "escapes" (not including being used in a call),
+  // the copy may be needed.
+  if (DoesObjCBlockEscape(Inst))
+    return false;
+
+  // Otherwise, it's not needed.
+  return true;
+}
+
  Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
    if (!RetainRVCallee) {
      LLVMContext &C = M->getContext();
      Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<Type *> Params;
-    Params.push_back(I8X);
-    FunctionType *FTy =
-      FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      RetainRVCallee =
        M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
                               Attributes);
@@ -1521,12 +1801,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
    if (!AutoreleaseRVCallee) {
      LLVMContext &C = M->getContext();
      Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<Type *> Params;
-    Params.push_back(I8X);
-    FunctionType *FTy =
-      FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      AutoreleaseRVCallee =
        M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
                               Attributes);
@@ -1537,10 +1814,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
  Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
    if (!ReleaseCallee) {
      LLVMContext &C = M->getContext();
-    std::vector<Type *> Params;
-    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      ReleaseCallee =
        M->getOrInsertFunction(
          "objc_release",
@@ -1553,10 +1828,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
  Constant *ObjCARCOpt::getRetainCallee(Module *M) {
    if (!RetainCallee) {
      LLVMContext &C = M->getContext();
-    std::vector<Type *> Params;
-    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      RetainCallee =
        M->getOrInsertFunction(
          "objc_retain",
@@ -1569,15 +1842,14 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
  Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
    if (!RetainBlockCallee) {
      LLVMContext &C = M->getContext();
-    std::vector<Type *> Params;
-    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    // objc_retainBlock is not nounwind because it calls user copy constructors
+    // which could theoretically throw.
      RetainBlockCallee =
        M->getOrInsertFunction(
          "objc_retainBlock",
          FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        Attributes);
+        AttrListPtr());
    }
    return RetainBlockCallee;
  }
@@ -1585,10 +1857,8 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
  Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
    if (!AutoreleaseCallee) {
      LLVMContext &C = M->getContext();
-    std::vector<Type *> Params;
-    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      AutoreleaseCallee =
        M->getOrInsertFunction(
          "objc_autorelease",
@@ -1702,6 +1972,7 @@ namespace {
    /// use here.
    enum DependenceKind {
      NeedsPositiveRetainCount,
+    AutoreleasePoolBoundary,
      CanChangeRetainCount,
      RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
      RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
@@ -1731,6 +2002,19 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
      }
    }
  
+  case AutoreleasePoolBoundary: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+      // These mark the end and begin of an autorelease pool scope.
+      return true;
+    default:
+      // Nothing else does this.
+      return false;
+    }
+  }
+
    case CanChangeRetainCount: {
      InstructionClass Class = GetInstructionClass(Inst);
      switch (Class) {
@@ -1748,6 +2032,7 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
    case RetainAutoreleaseDep:
      switch (GetBasicInstructionClass(Inst)) {
      case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
        // Don't merge an objc_autorelease with an objc_retain inside a different
        // autoreleasepool scope.
        return true;
@@ -1759,7 +2044,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
        // Nothing else matters for objc_retainAutorelease formation.
        return false;
      }
-    break;
  
    case RetainAutoreleaseRVDep: {
      InstructionClass Class = GetBasicInstructionClass(Inst);
@@ -1773,7 +2057,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
        // retainAutoreleaseReturnValue formation.
        return CanInterruptRV(Class);
      }
-    break;
    }
  
    case RetainRVDep:
@@ -1781,7 +2064,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
    }
  
    llvm_unreachable("Invalid dependence flavor");
-  return true;
  }
  
  /// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
@@ -1861,13 +2143,13 @@ static bool isNoopInstruction(const Instruction *I) {
  /// objc_retainAutoreleasedReturnValue if the operand is a return value.
  void
  ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
-  CallSite CS(GetObjCArg(Retain));
-  Instruction *Call = CS.getInstruction();
+  ImmutableCallSite CS(GetObjCArg(Retain));
+  const Instruction *Call = CS.getInstruction();
    if (!Call) return;
    if (Call->getParent() != Retain->getParent()) return;
  
    // Check that the call is next to the retain.
-  BasicBlock::iterator I = Call;
+  BasicBlock::const_iterator I = Call;
    ++I;
    while (isNoopInstruction(I)) ++I;
    if (&*I != Retain)
@@ -1880,22 +2162,30 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
  }
  
  /// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
-/// objc_retain if the operand is not a return value.  Or, if it can be
-/// paired with an objc_autoreleaseReturnValue, delete the pair and
-/// return true.
+/// objc_retain if the operand is not a return value.  Or, if it can be paired
+/// with an objc_autoreleaseReturnValue, delete the pair and return true.
  bool
  ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
-  // Check for the argument being from an immediately preceding call.
-  Value *Arg = GetObjCArg(RetainRV);
-  CallSite CS(Arg);
-  if (Instruction *Call = CS.getInstruction())
+  // Check for the argument being from an immediately preceding call or invoke.
+  const Value *Arg = GetObjCArg(RetainRV);
+  ImmutableCallSite CS(Arg);
+  if (const Instruction *Call = CS.getInstruction()) {
      if (Call->getParent() == RetainRV->getParent()) {
-      BasicBlock::iterator I = Call;
+      BasicBlock::const_iterator I = Call;
        ++I;
        while (isNoopInstruction(I)) ++I;
        if (&*I == RetainRV)
          return false;
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      BasicBlock *RetainRVParent = RetainRV->getParent();
+      if (II->getNormalDest() == RetainRVParent) {
+        BasicBlock::const_iterator I = RetainRVParent->begin();
+        while (isNoopInstruction(I)) ++I;
+        if (&*I == RetainRV)
+          return false;
+      }
      }
+  }
  
    // Check for being preceded by an objc_autoreleaseReturnValue on the same
    // pointer. In this case, we can delete the pair.
@@ -1981,6 +2271,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
      case IC_DestroyWeak: {
        CallInst *CI = cast<CallInst>(Inst);
        if (isNullOrUndef(CI->getArgOperand(0))) {
+        Changed = true;
          Type *Ty = CI->getArgOperand(0)->getType();
          new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
                        Constant::getNullValue(Ty),
@@ -1996,6 +2287,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
        CallInst *CI = cast<CallInst>(Inst);
        if (isNullOrUndef(CI->getArgOperand(0)) ||
            isNullOrUndef(CI->getArgOperand(1))) {
+        Changed = true;
          Type *Ty = CI->getArgOperand(0)->getType();
          new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
                        Constant::getNullValue(Ty),
@@ -2109,9 +2401,35 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
  
          // Check that there is nothing that cares about the reference
          // count between the call and the phi.
-        FindDependencies(NeedsPositiveRetainCount, Arg,
-                         Inst->getParent(), Inst,
-                         DependingInstructions, Visited, PA);
+        switch (Class) {
+        case IC_Retain:
+        case IC_RetainBlock:
+          // These can always be moved up.
+          break;
+        case IC_Release:
+          // These can't be moved across things that care about the retain
+          // count.
+          FindDependencies(NeedsPositiveRetainCount, Arg,
+                           Inst->getParent(), Inst,
+                           DependingInstructions, Visited, PA);
+          break;
+        case IC_Autorelease:
+          // These can't be moved across autorelease pool scope boundaries.
+          FindDependencies(AutoreleasePoolBoundary, Arg,
+                           Inst->getParent(), Inst,
+                           DependingInstructions, Visited, PA);
+          break;
+        case IC_RetainRV:
+        case IC_AutoreleaseRV:
+          // Don't move these; the RV optimization depends on the autoreleaseRV
+          // being tail called, and the retainRV being immediately after a call
+          // (which might still happen if we get lucky with codegen layout, but
+          // it's not worth taking the chance).
+          continue;
+        default:
+          llvm_unreachable("Invalid dependence flavor");
+        }
+
          if (DependingInstructions.size() == 1 &&
              *DependingInstructions.begin() == PN) {
            Changed = true;
@@ -2151,7 +2469,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
                                 BBState &MyStates) const {
    // If any top-down local-use or possible-dec has a succ which is earlier in
    // the sequence, forget it.
-  for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+  for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
         E = MyStates.top_down_ptr_end(); I != E; ++I)
      switch (I->second.GetSeq()) {
      default: break;
@@ -2160,14 +2478,33 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
        const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
        bool SomeSuccHasSame = false;
        bool AllSuccsHaveSame = true;
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
-      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-        PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
-        switch (SuccS.GetSeq()) {
+      PtrState &S = I->second;
+      succ_const_iterator SI(TI), SE(TI, false);
+
+      // If the terminator is an invoke marked with the
+      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+      // ignored, for ARC purposes.
+      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+        --SE;
+
+      for (; SI != SE; ++SI) {
+        Sequence SuccSSeq = S_None;
+        bool SuccSRRIKnownSafe = false;
+        // If VisitBottomUp has pointer information for this successor, take
+        // what we know about it.
+        DenseMap<const BasicBlock *, BBState>::iterator BBI =
+          BBStates.find(*SI);
+        assert(BBI != BBStates.end());
+        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+        SuccSSeq = SuccS.GetSeq();
+        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+        switch (SuccSSeq) {
          case S_None:
          case S_CanRelease: {
-          if (!S.RRI.KnownIncremented && !SuccS.RRI.KnownIncremented)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
              S.ClearSequenceProgress();
+            break;
+          }
            continue;
          }
          case S_Use:
@@ -2176,7 +2513,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
          case S_Stop:
          case S_Release:
          case S_MovableRelease:
-          if (!S.RRI.KnownIncremented && !SuccS.RRI.KnownIncremented)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
              AllSuccsHaveSame = false;
            break;
          case S_Retain:
@@ -2188,19 +2525,39 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
        // guards against loops in the middle of a sequence.
        if (SomeSuccHasSame && !AllSuccsHaveSame)
          S.ClearSequenceProgress();
+      break;
      }
      case S_CanRelease: {
        const Value *Arg = I->first;
        const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
        bool SomeSuccHasSame = false;
        bool AllSuccsHaveSame = true;
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
-      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-        PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
-        switch (SuccS.GetSeq()) {
+      PtrState &S = I->second;
+      succ_const_iterator SI(TI), SE(TI, false);
+
+      // If the terminator is an invoke marked with the
+      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+      // ignored, for ARC purposes.
+      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+        --SE;
+
+      for (; SI != SE; ++SI) {
+        Sequence SuccSSeq = S_None;
+        bool SuccSRRIKnownSafe = false;
+        // If VisitBottomUp has pointer information for this successor, take
+        // what we know about it.
+        DenseMap<const BasicBlock *, BBState>::iterator BBI =
+          BBStates.find(*SI);
+        assert(BBI != BBStates.end());
+        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+        SuccSSeq = SuccS.GetSeq();
+        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+        switch (SuccSSeq) {
          case S_None: {
-          if (!S.RRI.KnownIncremented && !SuccS.RRI.KnownIncremented)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
              S.ClearSequenceProgress();
+            break;
+          }
            continue;
          }
          case S_CanRelease:
@@ -2210,7 +2567,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
          case S_Release:
          case S_MovableRelease:
          case S_Use:
-          if (!S.RRI.KnownIncremented && !SuccS.RRI.KnownIncremented)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
              AllSuccsHaveSame = false;
            break;
          case S_Retain:
@@ -2222,10 +2579,165 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
        // guards against loops in the middle of a sequence.
        if (SomeSuccHasSame && !AllSuccsHaveSame)
          S.ClearSequenceProgress();
+      break;
      }
      }
  }
  
+bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+                                     BasicBlock *BB,
+                                     MapVector<Value *, RRInfo> &Retains,
+                                     BBState &MyStates) {
+  bool NestingDetected = false;
+  InstructionClass Class = GetInstructionClass(Inst);
+  const Value *Arg = 0;
+
+  switch (Class) {
+  case IC_Release: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+    // If we see two releases in a row on the same pointer. If so, make
+    // a note, and we'll cicle back to revisit it after we've
+    // hopefully eliminated the second release, which may allow us to
+    // eliminate the first release too.
+    // Theoretically we could implement removal of nested retain+release
+    // pairs by making PtrState hold a stack of states, but this is
+    // simple and avoids adding overhead for the non-nested case.
+    if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+      NestingDetected = true;
+
+    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+    S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
+    S.RRI.ReleaseMetadata = ReleaseMetadata;
+    S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+    S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+    S.RRI.Calls.insert(Inst);
+
+    S.IncrementNestCount();
+    break;
+  }
+  case IC_RetainBlock:
+    // An objc_retainBlock call with just a use may need to be kept,
+    // because it may be copying a block from the stack to the heap.
+    if (!IsRetainBlockOptimizable(Inst))
+      break;
+    // FALLTHROUGH
+  case IC_Retain:
+  case IC_RetainRV: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrBottomUpState(Arg);
+    S.SetKnownPositiveRefCount();
+    S.DecrementNestCount();
+
+    switch (S.GetSeq()) {
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+    case S_Use:
+      S.RRI.ReverseInsertPts.clear();
+      // FALL THROUGH
+    case S_CanRelease:
+      // Don't do retain+release tracking for IC_RetainRV, because it's
+      // better to let it remain as the first instruction after a call.
+      if (Class != IC_RetainRV) {
+        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+        Retains[Inst] = S.RRI;
+      }
+      S.ClearSequenceProgress();
+      break;
+    case S_None:
+      break;
+    case S_Retain:
+      llvm_unreachable("bottom-up pointer in retain state!");
+    }
+    return NestingDetected;
+  }
+  case IC_AutoreleasepoolPop:
+    // Conservatively, clear MyStates for all known pointers.
+    MyStates.clearBottomUpPointers();
+    return NestingDetected;
+  case IC_AutoreleasepoolPush:
+  case IC_None:
+    // These are irrelevant.
+    return NestingDetected;
+  default:
+    break;
+  }
+
+  // Consider any other possible effects of this instruction on each
+  // pointer being tracked.
+  for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+       ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+    const Value *Ptr = MI->first;
+    if (Ptr == Arg)
+      continue; // Handled above.
+    PtrState &S = MI->second;
+    Sequence Seq = S.GetSeq();
+
+    // Check for possible releases.
+    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      S.ClearRefCount();
+      switch (Seq) {
+      case S_Use:
+        S.SetSeq(S_CanRelease);
+        continue;
+      case S_CanRelease:
+      case S_Release:
+      case S_MovableRelease:
+      case S_Stop:
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+    }
+
+    // Check for possible direct uses.
+    switch (Seq) {
+    case S_Release:
+    case S_MovableRelease:
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        assert(S.RRI.ReverseInsertPts.empty());
+        // If this is an invoke instruction, we're scanning it as part of
+        // one of its successor blocks, since we can't insert code after it
+        // in its own block, and we don't want to split critical edges.
+        if (isa<InvokeInst>(Inst))
+          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+        else
+          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+        S.SetSeq(S_Use);
+      } else if (Seq == S_Release &&
+                 (Class == IC_User || Class == IC_CallOrUser)) {
+        // Non-movable releases depend on any possible objc pointer use.
+        S.SetSeq(S_Stop);
+        assert(S.RRI.ReverseInsertPts.empty());
+        // As above; handle invoke specially.
+        if (isa<InvokeInst>(Inst))
+          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+        else
+          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+      }
+      break;
+    case S_Stop:
+      if (CanUse(Inst, Ptr, PA, Class))
+        S.SetSeq(S_Use);
+      break;
+    case S_CanRelease:
+    case S_Use:
+    case S_None:
+      break;
+    case S_Retain:
+      llvm_unreachable("bottom-up pointer in retain state!");
+    }
+  }
+
+  return NestingDetected;
+}
+
  bool
  ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
                            DenseMap<const BasicBlock *, BBState> &BBStates,
@@ -2235,164 +2747,177 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
  
    // Merge the states from each successor to compute the initial state
    // for the current block.
-  const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-  succ_const_iterator SI(TI), SE(TI, false);
-  if (SI == SE)
-    MyStates.SetAsExit();
-  else
-    do {
-      const BasicBlock *Succ = *SI++;
-      if (Succ == BB)
-        continue;
-      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
-      // If we haven't seen this node yet, then we've found a CFG cycle.
-      // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
-      if (I == BBStates.end())
-        continue;
-      MyStates.InitFromSucc(I->second);
-      while (SI != SE) {
-        Succ = *SI++;
-        if (Succ != BB) {
-          I = BBStates.find(Succ);
-          if (I != BBStates.end())
-            MyStates.MergeSucc(I->second);
-        }
-      }
-      break;
-    } while (SI != SE);
+  for (BBState::edge_iterator SI(MyStates.succ_begin()),
+       SE(MyStates.succ_end()); SI != SE; ++SI) {
+    const BasicBlock *Succ = *SI;
+    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+    assert(I != BBStates.end());
+    MyStates.InitFromSucc(I->second);
+    ++SI;
+    for (; SI != SE; ++SI) {
+      Succ = *SI;
+      I = BBStates.find(Succ);
+      assert(I != BBStates.end());
+      MyStates.MergeSucc(I->second);
+    }
+    break;
+  }
  
    // Visit all the instructions, bottom-up.
    for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
      Instruction *Inst = llvm::prior(I);
-    InstructionClass Class = GetInstructionClass(Inst);
-    const Value *Arg = 0;
  
-    switch (Class) {
-    case IC_Release: {
-      Arg = GetObjCArg(Inst);
+    // Invoke instructions are visited as part of their successors (below).
+    if (isa<InvokeInst>(Inst))
+      continue;
+
+    NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+  }
+
+  // If there's a predecessor with an invoke, visit the invoke as if it were
+  // part of this block, since we can't insert code after an invoke in its own
+  // block, and we don't want to split critical edges.
+  for (BBState::edge_iterator PI(MyStates.pred_begin()),
+       PE(MyStates.pred_end()); PI != PE; ++PI) {
+    BasicBlock *Pred = *PI;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
+      NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+                                    DenseMap<Value *, RRInfo> &Releases,
+                                    BBState &MyStates) {
+  bool NestingDetected = false;
+  InstructionClass Class = GetInstructionClass(Inst);
+  const Value *Arg = 0;
+
+  switch (Class) {
+  case IC_RetainBlock:
+    // An objc_retainBlock call with just a use may need to be kept,
+    // because it may be copying a block from the stack to the heap.
+    if (!IsRetainBlockOptimizable(Inst))
+      break;
+    // FALLTHROUGH
+  case IC_Retain:
+  case IC_RetainRV: {
+    Arg = GetObjCArg(Inst);
  
-      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+    PtrState &S = MyStates.getPtrTopDownState(Arg);
  
-      // If we see two releases in a row on the same pointer. If so, make
+    // Don't do retain+release tracking for IC_RetainRV, because it's
+    // better to let it remain as the first instruction after a call.
+    if (Class != IC_RetainRV) {
+      // If we see two retains in a row on the same pointer. If so, make
        // a note, and we'll cicle back to revisit it after we've
-      // hopefully eliminated the second release, which may allow us to
-      // eliminate the first release too.
+      // hopefully eliminated the second retain, which may allow us to
+      // eliminate the first retain too.
        // Theoretically we could implement removal of nested retain+release
        // pairs by making PtrState hold a stack of states, but this is
        // simple and avoids adding overhead for the non-nested case.
-      if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+      if (S.GetSeq() == S_Retain)
          NestingDetected = true;
  
-      S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
-      S.RRI.clear();
-      S.RRI.KnownIncremented = S.IsKnownIncremented();
-      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      S.ResetSequenceProgress(S_Retain);
+      S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+      // Don't check S.IsKnownIncremented() here because it's not sufficient.
+      S.RRI.KnownSafe = S.IsKnownNested();
        S.RRI.Calls.insert(Inst);
+    }
  
-      S.IncrementRefCount();
+    S.IncrementNestCount();
+    return NestingDetected;
+  }
+  case IC_Release: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrTopDownState(Arg);
+    S.DecrementNestCount();
+
+    switch (S.GetSeq()) {
+    case S_Retain:
+    case S_CanRelease:
+      S.RRI.ReverseInsertPts.clear();
+      // FALL THROUGH
+    case S_Use:
+      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      Releases[Inst] = S.RRI;
+      S.ClearSequenceProgress();
+      break;
+    case S_None:
        break;
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+      llvm_unreachable("top-down pointer in release state!");
      }
-    case IC_RetainBlock:
-    case IC_Retain:
-    case IC_RetainRV: {
-      Arg = GetObjCArg(Inst);
+    break;
+  }
+  case IC_AutoreleasepoolPop:
+    // Conservatively, clear MyStates for all known pointers.
+    MyStates.clearTopDownPointers();
+    return NestingDetected;
+  case IC_AutoreleasepoolPush:
+  case IC_None:
+    // These are irrelevant.
+    return NestingDetected;
+  default:
+    break;
+  }
  
-      PtrState &S = MyStates.getPtrBottomUpState(Arg);
-      S.DecrementRefCount();
-      S.SetAtLeastOneRefCount();
+  // Consider any other possible effects of this instruction on each
+  // pointer being tracked.
+  for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+       ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+    const Value *Ptr = MI->first;
+    if (Ptr == Arg)
+      continue; // Handled above.
+    PtrState &S = MI->second;
+    Sequence Seq = S.GetSeq();
+
+    // Check for possible releases.
+    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      S.ClearRefCount();
+      switch (Seq) {
+      case S_Retain:
+        S.SetSeq(S_CanRelease);
+        assert(S.RRI.ReverseInsertPts.empty());
+        S.RRI.ReverseInsertPts.insert(Inst);
  
-      switch (S.GetSeq()) {
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
+        // One call can't cause a transition from S_Retain to S_CanRelease
+        // and S_CanRelease to S_Use. If we've made the first transition,
+        // we're done.
+        continue;
        case S_Use:
-        S.RRI.ReverseInsertPts.clear();
-        // FALL THROUGH
        case S_CanRelease:
-        // Don't do retain+release tracking for IC_RetainRV, because it's
-        // better to let it remain as the first instruction after a call.
-        if (Class != IC_RetainRV) {
-          S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-          Retains[Inst] = S.RRI;
-        }
-        S.ClearSequenceProgress();
-        break;
        case S_None:
          break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
-      }
-      break;
-    }
-    case IC_AutoreleasepoolPop:
-      // Conservatively, clear MyStates for all known pointers.
-      MyStates.clearBottomUpPointers();
-      continue;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      // These are irrelevant.
-      continue;
-    default:
-      break;
-    }
-
-    // Consider any other possible effects of this instruction on each
-    // pointer being tracked.
-    for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
-         ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
-      const Value *Ptr = MI->first;
-      if (Ptr == Arg)
-        continue; // Handled above.
-      PtrState &S = MI->second;
-      Sequence Seq = S.GetSeq();
-
-      // Check for possible releases. Note that we don't have to update
-      // S's RefCount because any reference count modifications would be
-      // done through a different provenance.
-      if (!IsRetain(Class) && Class != IC_RetainBlock &&
-          CanAlterRefCount(Inst, Ptr, PA, Class))
-        switch (Seq) {
-        case S_Use:
-          S.SetSeq(S_CanRelease);
-          continue;
-        case S_CanRelease:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Stop:
-        case S_None:
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-
-      // Check for possible direct uses.
-      switch (Seq) {
+      case S_Stop:
        case S_Release:
        case S_MovableRelease:
-        if (CanUse(Inst, Ptr, PA, Class)) {
-          S.RRI.ReverseInsertPts.clear();
-          S.RRI.ReverseInsertPts.insert(Inst);
-          S.SetSeq(S_Use);
-        } else if (Seq == S_Release &&
-                   (Class == IC_User || Class == IC_CallOrUser)) {
-          // Non-movable releases depend on any possible objc pointer use.
-          S.SetSeq(S_Stop);
-          S.RRI.ReverseInsertPts.clear();
-          S.RRI.ReverseInsertPts.insert(Inst);
-        }
-        break;
-      case S_Stop:
-        if (CanUse(Inst, Ptr, PA, Class))
-          S.SetSeq(S_Use);
-        break;
-      case S_CanRelease:
-      case S_Use:
-      case S_None:
-        break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
+        llvm_unreachable("top-down pointer in release state!");
        }
      }
+
+    // Check for possible direct uses.
+    switch (Seq) {
+    case S_CanRelease:
+      if (CanUse(Inst, Ptr, PA, Class))
+        S.SetSeq(S_Use);
+      break;
+    case S_Retain:
+    case S_Use:
+    case S_None:
+      break;
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+      llvm_unreachable("top-down pointer in release state!");
+    }
    }
  
    return NestingDetected;
@@ -2407,164 +2932,117 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
  
    // Merge the states from each predecessor to compute the initial state
    // for the current block.
-  const_pred_iterator PI(BB), PE(BB, false);
-  if (PI == PE)
-    MyStates.SetAsEntry();
-  else
-    do {
-      const BasicBlock *Pred = *PI++;
-      if (Pred == BB)
-        continue;
-      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+  for (BBState::edge_iterator PI(MyStates.pred_begin()),
+       PE(MyStates.pred_end()); PI != PE; ++PI) {
+    const BasicBlock *Pred = *PI;
+    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+    assert(I != BBStates.end());
+    MyStates.InitFromPred(I->second);
+    ++PI;
+    for (; PI != PE; ++PI) {
+      Pred = *PI;
+      I = BBStates.find(Pred);
        assert(I != BBStates.end());
-      // If we haven't seen this node yet, then we've found a CFG cycle.
-      // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
-      if (!I->second.isVisitedTopDown())
-        continue;
-      MyStates.InitFromPred(I->second);
-      while (PI != PE) {
-        Pred = *PI++;
-        if (Pred != BB) {
-          I = BBStates.find(Pred);
-          assert(I != BBStates.end());
-          if (I->second.isVisitedTopDown())
-            MyStates.MergePred(I->second);
-        }
-      }
-      break;
-    } while (PI != PE);
+      MyStates.MergePred(I->second);
+    }
+    break;
+  }
  
    // Visit all the instructions, top-down.
    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
      Instruction *Inst = I;
-    InstructionClass Class = GetInstructionClass(Inst);
-    const Value *Arg = 0;
+    NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+  }
  
-    switch (Class) {
-    case IC_RetainBlock:
-    case IC_Retain:
-    case IC_RetainRV: {
-      Arg = GetObjCArg(Inst);
+  CheckForCFGHazards(BB, BBStates, MyStates);
+  return NestingDetected;
+}
  
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
+static void
+ComputePostOrders(Function &F,
+                  SmallVectorImpl<BasicBlock *> &PostOrder,
+                  SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
+                  unsigned NoObjCARCExceptionsMDKind,
+                  DenseMap<const BasicBlock *, BBState> &BBStates) {
+  /// Visited - The visited set, for doing DFS walks.
+  SmallPtrSet<BasicBlock *, 16> Visited;
  
-      // Don't do retain+release tracking for IC_RetainRV, because it's
-      // better to let it remain as the first instruction after a call.
-      if (Class != IC_RetainRV) {
-        // If we see two retains in a row on the same pointer. If so, make
-        // a note, and we'll cicle back to revisit it after we've
-        // hopefully eliminated the second retain, which may allow us to
-        // eliminate the first retain too.
-        // Theoretically we could implement removal of nested retain+release
-        // pairs by making PtrState hold a stack of states, but this is
-        // simple and avoids adding overhead for the non-nested case.
-        if (S.GetSeq() == S_Retain)
-          NestingDetected = true;
-
-        S.SetSeq(S_Retain);
-        S.RRI.clear();
-        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-        S.RRI.KnownIncremented = S.IsKnownIncremented();
-        S.RRI.Calls.insert(Inst);
+  // Do DFS, computing the PostOrder.
+  SmallPtrSet<BasicBlock *, 16> OnStack;
+  SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+
+  // Functions always have exactly one entry block, and we don't have
+  // any other block that we treat like an entry block.
+  BasicBlock *EntryBB = &F.getEntryBlock();
+  BBState &MyStates = BBStates[EntryBB];
+  MyStates.SetAsEntry();
+  TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+  SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
+  Visited.insert(EntryBB);
+  OnStack.insert(EntryBB);
+  do {
+  dfs_next_succ:
+    BasicBlock *CurrBB = SuccStack.back().first;
+    TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
+    succ_iterator SE(TI, false);
+
+    // If the terminator is an invoke marked with the
+    // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+    // ignored, for ARC purposes.
+    if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+      --SE;
+
+    while (SuccStack.back().second != SE) {
+      BasicBlock *SuccBB = *SuccStack.back().second++;
+      if (Visited.insert(SuccBB)) {
+        TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
+        SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+        BBStates[CurrBB].addSucc(SuccBB);
+        BBState &SuccStates = BBStates[SuccBB];
+        SuccStates.addPred(CurrBB);
+        OnStack.insert(SuccBB);
+        goto dfs_next_succ;
        }
  
-      S.SetAtLeastOneRefCount();
-      S.IncrementRefCount();
-      break;
+      if (!OnStack.count(SuccBB)) {
+        BBStates[CurrBB].addSucc(SuccBB);
+        BBStates[SuccBB].addPred(CurrBB);
+      }
      }
-    case IC_Release: {
-      Arg = GetObjCArg(Inst);
+    OnStack.erase(CurrBB);
+    PostOrder.push_back(CurrBB);
+    SuccStack.pop_back();
+  } while (!SuccStack.empty());
  
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
-      S.DecrementRefCount();
+  Visited.clear();
  
-      switch (S.GetSeq()) {
-      case S_Retain:
-      case S_CanRelease:
-        S.RRI.ReverseInsertPts.clear();
-        // FALL THROUGH
-      case S_Use:
-        S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-        S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
-        Releases[Inst] = S.RRI;
-        S.ClearSequenceProgress();
-        break;
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
-      }
-      break;
-    }
-    case IC_AutoreleasepoolPop:
-      // Conservatively, clear MyStates for all known pointers.
-      MyStates.clearTopDownPointers();
-      continue;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      // These are irrelevant.
+  // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+  // Functions may have many exits, and there also blocks which we treat
+  // as exits due to ignored edges.
+  SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *ExitBB = I;
+    BBState &MyStates = BBStates[ExitBB];
+    if (!MyStates.isExit())
        continue;
-    default:
-      break;
-    }
  
-    // Consider any other possible effects of this instruction on each
-    // pointer being tracked.
-    for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
-         ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
-      const Value *Ptr = MI->first;
-      if (Ptr == Arg)
-        continue; // Handled above.
-      PtrState &S = MI->second;
-      Sequence Seq = S.GetSeq();
-
-      // Check for possible releases. Note that we don't have to update
-      // S's RefCount because any reference count modifications would be
-      // done through a different provenance.
-      if (!IsRetain(Class) && Class != IC_RetainBlock &&
-          CanAlterRefCount(Inst, Ptr, PA, Class))
-        switch (Seq) {
-        case S_Retain:
-          S.SetSeq(S_CanRelease);
-          S.RRI.ReverseInsertPts.clear();
-          S.RRI.ReverseInsertPts.insert(Inst);
+    MyStates.SetAsExit();
  
-          // One call can't cause a transition from S_Retain to S_CanRelease
-          // and S_CanRelease to S_Use. If we've made the first transition,
-          // we're done.
-          continue;
-        case S_Use:
-        case S_CanRelease:
-        case S_None:
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          llvm_unreachable("top-down pointer in release state!");
+    PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
+    Visited.insert(ExitBB);
+    while (!PredStack.empty()) {
+    reverse_dfs_next_succ:
+      BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
+      while (PredStack.back().second != PE) {
+        BasicBlock *BB = *PredStack.back().second++;
+        if (Visited.insert(BB)) {
+          PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
+          goto reverse_dfs_next_succ;
          }
-
-      // Check for possible direct uses.
-      switch (Seq) {
-      case S_CanRelease:
-        if (CanUse(Inst, Ptr, PA, Class))
-          S.SetSeq(S_Use);
-        break;
-      case S_Use:
-      case S_Retain:
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
        }
+      ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
      }
    }
-
-  CheckForCFGHazards(BB, BBStates, MyStates);
-  return NestingDetected;
  }
  
  // Visit - Visit the function both top-down and bottom-up.
@@ -2573,42 +3051,31 @@ ObjCARCOpt::Visit(Function &F,
                    DenseMap<const BasicBlock *, BBState> &BBStates,
                    MapVector<Value *, RRInfo> &Retains,
                    DenseMap<Value *, RRInfo> &Releases) {
-  // Use reverse-postorder on the reverse CFG for bottom-up, because we
-  // magically know that loops will be well behaved, i.e. they won't repeatedly
-  // call retain on a single pointer without doing a release. We can't use
-  // ReversePostOrderTraversal here because we want to walk up from each
-  // function exit point.
-  SmallPtrSet<BasicBlock *, 16> Visited;
-  SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
-  SmallVector<BasicBlock *, 16> Order;
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *BB = I;
-    if (BB->getTerminator()->getNumSuccessors() == 0)
-      Stack.push_back(std::make_pair(BB, pred_begin(BB)));
-  }
-  while (!Stack.empty()) {
-    pred_iterator End = pred_end(Stack.back().first);
-    while (Stack.back().second != End) {
-      BasicBlock *BB = *Stack.back().second++;
-      if (Visited.insert(BB))
-        Stack.push_back(std::make_pair(BB, pred_begin(BB)));
-    }
-    Order.push_back(Stack.pop_back_val().first);
-  }
+
+  // Use reverse-postorder traversals, because we magically know that loops
+  // will be well behaved, i.e. they won't repeatedly call retain on a single
+  // pointer without doing a release. We can't use the ReversePostOrderTraversal
+  // class here because we want the reverse-CFG postorder to consider each
+  // function exit point, and we want to ignore selected cycle edges.
+  SmallVector<BasicBlock *, 16> PostOrder;
+  SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+  ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
+                    NoObjCARCExceptionsMDKind,
+                    BBStates);
+
+  // Use reverse-postorder on the reverse CFG for bottom-up.
    bool BottomUpNestingDetected = false;
-  while (!Order.empty()) {
-    BasicBlock *BB = Order.pop_back_val();
-    BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
-  }
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+       ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+       I != E; ++I)
+    BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
  
-  // Use regular reverse-postorder for top-down.
+  // Use reverse-postorder for top-down.
    bool TopDownNestingDetected = false;
-  typedef ReversePostOrderTraversal<Function *> RPOTType;
-  RPOTType RPOT(&F);
-  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
-  }
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+       PostOrder.rbegin(), E = PostOrder.rend();
+       I != E; ++I)
+    TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
  
    return TopDownNestingDetected && BottomUpNestingDetected;
  }
@@ -2636,40 +3103,26 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
                           getRetainBlockCallee(M) : getRetainCallee(M),
                         MyArg, "", InsertPt);
      Call->setDoesNotThrow();
-    if (!RetainsToMove.IsRetainBlock)
+    if (RetainsToMove.IsRetainBlock)
+      Call->setMetadata(CopyOnEscapeMDKind,
+                        MDNode::get(M->getContext(), ArrayRef<Value *>()));
+    else
        Call->setTailCall();
    }
    for (SmallPtrSet<Instruction *, 2>::const_iterator
         PI = RetainsToMove.ReverseInsertPts.begin(),
         PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
-    Instruction *LastUse = *PI;
-    Instruction *InsertPts[] = { 0, 0, 0 };
-    if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
-      // We can't insert code immediately after an invoke instruction, so
-      // insert code at the beginning of both successor blocks instead.
-      // The invoke's return value isn't available in the unwind block,
-      // but our releases will never depend on it, because they must be
-      // paired with retains from before the invoke.
-      InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
-      InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
-    } else {
-      // Insert code immediately after the last use.
-      InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
-    }
-
-    for (Instruction **I = InsertPts; *I; ++I) {
-      Instruction *InsertPt = *I;
-      Value *MyArg = ArgTy == ParamTy ? Arg :
-                     new BitCastInst(Arg, ParamTy, "", InsertPt);
-      CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
-                                        "", InsertPt);
-      // Attach a clang.imprecise_release metadata tag, if appropriate.
-      if (MDNode *M = ReleasesToMove.ReleaseMetadata)
-        Call->setMetadata(ImpreciseReleaseMDKind, M);
-      Call->setDoesNotThrow();
-      if (ReleasesToMove.IsTailCallRelease)
-        Call->setTailCall();
-    }
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+                                      "", InsertPt);
+    // Attach a clang.imprecise_release metadata tag, if appropriate.
+    if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+      Call->setMetadata(ImpreciseReleaseMDKind, M);
+    Call->setDoesNotThrow();
+    if (ReleasesToMove.IsTailCallRelease)
+      Call->setTailCall();
    }
  
    // Delete the original retain and release calls.
@@ -2689,6 +3142,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
    }
  }
  
+/// PerformCodePlacement - Identify pairings between the retains and releases,
+/// and delete and/or move them.
  bool
  ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                                     &BBStates,
@@ -2702,9 +3157,10 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
    SmallVector<Instruction *, 4> NewReleases;
    SmallVector<Instruction *, 8> DeadInsts;
  
+  // Visit each retain.
    for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
-       E = Retains.end(); I != E; ) {
-    Value *V = (I++)->first;
+       E = Retains.end(); I != E; ++I) {
+    Value *V = I->first;
      if (!V) continue; // blotted
  
      Instruction *Retain = cast<Instruction>(V);
@@ -2715,9 +3171,18 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
      // regardless of what possible decrements or uses lie between them.
      bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
  
+    // A constant pointer can't be pointing to an object on the heap. It may
+    // be reference-counted, but it won't be deleted.
+    if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+      if (const GlobalVariable *GV =
+            dyn_cast<GlobalVariable>(
+              StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+        if (GV->isConstant())
+          KnownSafe = true;
+
      // If a pair happens in a region where it is known that the reference count
      // is already incremented, we can similarly ignore possible decrements.
-    bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+    bool KnownSafeTD = true, KnownSafeBU = true;
  
      // Connect the dots between the top-down-collected RetainsToMove and
      // bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2737,7 +3202,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
          MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
          assert(It != Retains.end());
          const RRInfo &NewRetainRRI = It->second;
-        KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+        KnownSafeTD &= NewRetainRRI.KnownSafe;
          for (SmallPtrSet<Instruction *, 2>::const_iterator
               LI = NewRetainRRI.Calls.begin(),
               LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2793,7 +3258,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
            Releases.find(NewRelease);
          assert(It != Releases.end());
          const RRInfo &NewReleaseRRI = It->second;
-        KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+        KnownSafeBU &= NewReleaseRRI.KnownSafe;
          for (SmallPtrSet<Instruction *, 2>::const_iterator
               LI = NewReleaseRRI.Calls.begin(),
               LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
@@ -2841,9 +3306,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
        if (NewRetains.empty()) break;
      }
  
-    // If the pointer is known incremented, we can safely delete the pair
-    // regardless of what's between them.
-    if (KnownIncrementedTD || KnownIncrementedBU) {
+    // If the pointer is known incremented or nested, we can safely delete the
+    // pair regardless of what's between them.
+    if (KnownSafeTD || KnownSafeBU) {
        RetainsToMove.ReverseInsertPts.clear();
        ReleasesToMove.ReverseInsertPts.clear();
        NewCount = 0;
@@ -2866,6 +3331,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
  
      // Ok, everything checks out and we're all set. Let's move some code!
      Changed = true;
+    assert(OldCount != 0 && "Unreachable code?");
      AnyPairsCompletelyEliminated = NewCount == 0;
      NumRRs += OldCount - NewCount;
      MoveCalls(Arg, RetainsToMove, ReleasesToMove,
@@ -3006,7 +3472,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
      if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
        for (Value::use_iterator UI = Alloca->use_begin(),
             UE = Alloca->use_end(); UI != UE; ++UI) {
-        Instruction *UserInst = cast<Instruction>(*UI);
+        const Instruction *UserInst = cast<Instruction>(*UI);
          switch (GetBasicInstructionClass(UserInst)) {
          case IC_InitWeak:
          case IC_StoreWeak:
@@ -3020,8 +3486,18 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
        for (Value::use_iterator UI = Alloca->use_begin(),
             UE = Alloca->use_end(); UI != UE; ) {
          CallInst *UserInst = cast<CallInst>(*UI++);
-        if (!UserInst->use_empty())
-          UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+        switch (GetBasicInstructionClass(UserInst)) {
+        case IC_InitWeak:
+        case IC_StoreWeak:
+          // These functions return their second argument.
+          UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
+          break;
+        case IC_DestroyWeak:
+          // No return value.
+          break;
+        default:
+          llvm_unreachable("alloca really is used!");
+        }
          UserInst->eraseFromParent();
        }
        Alloca->eraseFromParent();
@@ -3089,8 +3565,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
        if (!Autorelease)
          goto next_block;
-      InstructionClass AutoreleaseClass =
-        GetBasicInstructionClass(Autorelease);
+      InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
        if (!IsAutorelease(AutoreleaseClass))
          goto next_block;
        if (GetObjCArg(Autorelease) != Arg)
@@ -3128,7 +3603,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
  
          // Check that there is nothing that can affect the reference
          // count between the retain and the call.
-        FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+        // Note that Retain need not be in BB.
+        FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
                           DependingInstructions, Visited, PA);
          if (DependingInstructions.size() != 1)
            goto next_block;
@@ -3165,6 +3641,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
    if (!EnableARCOpts)
      return false;
  
+  // If nothing in the Module uses ARC, don't do anything.
    Run = ModuleHasARC(M);
    if (!Run)
      return false;
@@ -3172,10 +3649,14 @@ bool ObjCARCOpt::doInitialization(Module &M) {
    // Identify the imprecise release metadata kind.
    ImpreciseReleaseMDKind =
      M.getContext().getMDKindID("clang.imprecise_release");
+  CopyOnEscapeMDKind =
+    M.getContext().getMDKindID("clang.arc.copy_on_escape");
+  NoObjCARCExceptionsMDKind =
+    M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
  
    // Intuitively, objc_retain and others are nocapture, however in practice
    // they are not, because they return their argument value. And objc_release
-  // calls finalizers.
+  // calls finalizers which can have arbitrary side effects.
  
    // These are initialized lazily.
    RetainRVCallee = 0;
@@ -3227,8 +3708,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
        while (OptimizeSequences(F)) {}
  
    // Optimizations if objc_autorelease is used.
-  if (UsedInThisFunction &
-      ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+  if (UsedInThisFunction & ((1 << IC_Autorelease) |
+                            (1 << IC_AutoreleaseRV)))
      OptimizeReturns(F);
  
    return Changed;
@@ -3273,6 +3754,11 @@ namespace {
      /// RetainRV calls to make the optimization work on targets which need it.
      const MDString *RetainRVMarker;
  
+    /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
+    /// at the end of walking the function we have found no alloca
+    /// instructions, these calls can be marked "tail".
+    SmallPtrSet<CallInst *, 8> StoreStrongCalls;
+
      Constant *getStoreStrongCallee(Module *M);
      Constant *getRetainAutoreleaseCallee(Module *M);
      Constant *getRetainAutoreleaseRVCallee(Module *M);
@@ -3322,13 +3808,11 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
      LLVMContext &C = M->getContext();
      Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
      Type *I8XX = PointerType::getUnqual(I8X);
-    std::vector<Type *> Params;
-    Params.push_back(I8XX);
-    Params.push_back(I8X);
+    Type *Params[] = { I8XX, I8X };
  
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
-    Attributes.addAttr(1, Attribute::NoCapture);
+    AttrListPtr Attributes = AttrListPtr()
+      .addAttr(~0u, Attribute::NoUnwind)
+      .addAttr(1, Attribute::NoCapture);
  
      StoreStrongCallee =
        M->getOrInsertFunction(
@@ -3343,12 +3827,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
    if (!RetainAutoreleaseCallee) {
      LLVMContext &C = M->getContext();
      Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<Type *> Params;
-    Params.push_back(I8X);
-    FunctionType *FTy =
-      FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      RetainAutoreleaseCallee =
        M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
    }
@@ -3359,12 +3840,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
    if (!RetainAutoreleaseRVCallee) {
      LLVMContext &C = M->getContext();
      Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<Type *> Params;
-    Params.push_back(I8X);
-    FunctionType *FTy =
-      FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes;
-    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
      RetainAutoreleaseRVCallee =
        M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
                               Attributes);
@@ -3372,8 +3850,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
    return RetainAutoreleaseRVCallee;
  }
  
-/// ContractAutorelease - Merge an autorelease with a retain into a fused
-/// call.
+/// ContractAutorelease - Merge an autorelease with a retain into a fused call.
  bool
  ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
                                       InstructionClass Class,
@@ -3428,24 +3905,47 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
  void ObjCARCContract::ContractRelease(Instruction *Release,
                                        inst_iterator &Iter) {
    LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
-  if (!Load || Load->isVolatile()) return;
+  if (!Load || !Load->isSimple()) return;
  
    // For now, require everything to be in one basic block.
    BasicBlock *BB = Release->getParent();
    if (Load->getParent() != BB) return;
  
-  // Walk down to find the store.
+  // Walk down to find the store and the release, which may be in either order.
    BasicBlock::iterator I = Load, End = BB->end();
    ++I;
    AliasAnalysis::Location Loc = AA->getLocation(Load);
-  while (I != End &&
-         (&*I == Release ||
-          IsRetain(GetBasicInstructionClass(I)) ||
-          !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
-    ++I;
-  StoreInst *Store = dyn_cast<StoreInst>(I);
-  if (!Store || Store->isVolatile()) return;
-  if (Store->getPointerOperand() != Loc.Ptr) return;
+  StoreInst *Store = 0;
+  bool SawRelease = false;
+  for (; !Store || !SawRelease; ++I) {
+    if (I == End)
+      return;
+
+    Instruction *Inst = I;
+    if (Inst == Release) {
+      SawRelease = true;
+      continue;
+    }
+
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+
+    // Unrelated retains are harmless.
+    if (IsRetain(Class))
+      continue;
+
+    if (Store) {
+      // The store is the point where we're going to put the objc_storeStrong,
+      // so make sure there are no uses after it.
+      if (CanUse(Inst, Load, PA, Class))
+        return;
+    } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
+      // We are moving the load down to the store, so check for anything
+      // else which writes to the memory between the load and the store.
+      Store = dyn_cast<StoreInst>(Inst);
+      if (!Store || !Store->isSimple()) return;
+      if (Store->getPointerOperand() != Loc.Ptr) return;
+    }
+  }
  
    Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
  
@@ -3476,6 +3976,11 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
    StoreStrong->setDoesNotThrow();
    StoreStrong->setDebugLoc(Store->getDebugLoc());
  
+  // We can't set the tail flag yet, because we haven't yet determined
+  // whether there are any escaping allocas. Remember this call, so that
+  // we can set the tail flag once we know it's safe.
+  StoreStrongCalls.insert(StoreStrong);
+
    if (&*Iter == Store) ++Iter;
    Store->eraseFromParent();
    Release->eraseFromParent();
@@ -3485,6 +3990,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
  }
  
  bool ObjCARCContract::doInitialization(Module &M) {
+  // If nothing in the Module uses ARC, don't do anything.
    Run = ModuleHasARC(M);
    if (!Run)
      return false;
@@ -3522,6 +4028,14 @@ bool ObjCARCContract::runOnFunction(Function &F) {
  
    PA.setAA(&getAnalysis<AliasAnalysis>());
  
+  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+  // keyword. Be conservative if the function has variadic arguments.
+  // It seems that functions which "return twice" are also unsafe for the
+  // "tail" argument, because they are setjmp, which could need to
+  // return to an earlier stack state.
+  bool TailOkForStoreStrongs = !F.isVarArg() &&
+                               !F.callsFunctionThatReturnsTwice();
+
    // For ObjC library calls which return their argument, replace uses of the
    // argument with uses of the call return value, if it dominates the use. This
    // reduces register pressure.
@@ -3550,9 +4064,24 @@ bool ObjCARCContract::runOnFunction(Function &F) {
        if (!RetainRVMarker)
          break;
        BasicBlock::iterator BBI = Inst;
-      --BBI;
-      while (isNoopInstruction(BBI)) --BBI;
+      BasicBlock *InstParent = Inst->getParent();
+
+      // Step up to see if the call immediately precedes the RetainRV call.
+      // If it's an invoke, we have to cross a block boundary. And we have
+      // to carefully dodge no-op instructions.
+      do {
+        if (&*BBI == InstParent->begin()) {
+          BasicBlock *Pred = InstParent->getSinglePredecessor();
+          if (!Pred)
+            goto decline_rv_optimization;
+          BBI = Pred->getTerminator();
+          break;
+        }
+        --BBI;
+      } while (isNoopInstruction(BBI));
+
        if (&*BBI == GetObjCArg(Inst)) {
+        Changed = true;
          InlineAsm *IA =
            InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
                                             /*isVarArg=*/false),
@@ -3560,6 +4089,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
                           /*Constraints=*/"", /*hasSideEffects=*/true);
          CallInst::Create(IA, "", Inst);
        }
+    decline_rv_optimization:
        break;
      }
      case IC_InitWeak: {
@@ -3578,6 +4108,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
      case IC_Release:
        ContractRelease(Inst, I);
        continue;
+    case IC_User:
+      // Be conservative if the function has any alloca instructions.
+      // Technically we only care about escaping alloca instructions,
+      // but this is sufficient to handle some interesting cases.
+      if (isa<AllocaInst>(Inst))
+        TailOkForStoreStrongs = false;
+      continue;
      default:
        continue;
      }
@@ -3595,40 +4132,46 @@ bool ObjCARCContract::runOnFunction(Function &F) {
          Use &U = UI.getUse();
          unsigned OperandNo = UI.getOperandNo();
          ++UI; // Increment UI now, because we may unlink its element.
-        if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
-          if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
-            Changed = true;
-            Instruction *Replacement = Inst;
-            Type *UseTy = U.get()->getType();
-            if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
-              // For PHI nodes, insert the bitcast in the predecessor block.
-              unsigned ValNo =
-                PHINode::getIncomingValueNumForOperand(OperandNo);
-              BasicBlock *BB =
-                PHI->getIncomingBlock(ValNo);
-              if (Replacement->getType() != UseTy)
-                Replacement = new BitCastInst(Replacement, UseTy, "",
-                                              &BB->back());
-              for (unsigned i = 0, e = PHI->getNumIncomingValues();
-                   i != e; ++i)
-                if (PHI->getIncomingBlock(i) == BB) {
-                  // Keep the UI iterator valid.
-                  if (&PHI->getOperandUse(
-                        PHINode::getOperandNumForIncomingValue(i)) ==
-                        &UI.getUse())
-                    ++UI;
-                  PHI->setIncomingValue(i, Replacement);
-                }
-            } else {
-              if (Replacement->getType() != UseTy)
-                Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
-              U.set(Replacement);
-            }
+
+        // If the call's return value dominates a use of the call's argument
+        // value, rewrite the use to use the return value. We check for
+        // reachability here because an unreachable call is considered to
+        // trivially dominate itself, which would lead us to rewriting its
+        // argument in terms of its return value, which would lead to
+        // infinite loops in GetObjCArg.
+        if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
+          Changed = true;
+          Instruction *Replacement = Inst;
+          Type *UseTy = U.get()->getType();
+          if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
+            // For PHI nodes, insert the bitcast in the predecessor block.
+            unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+            BasicBlock *BB = PHI->getIncomingBlock(ValNo);
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            &BB->back());
+            // While we're here, rewrite all edges for this PHI, rather
+            // than just one use at a time, to minimize the number of
+            // bitcasts we emit.
+            for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+              if (PHI->getIncomingBlock(i) == BB) {
+                // Keep the UI iterator valid.
+                if (&PHI->getOperandUse(
+                      PHINode::getOperandNumForIncomingValue(i)) ==
+                    &UI.getUse())
+                  ++UI;
+                PHI->setIncomingValue(i, Replacement);
+              }
+          } else {
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            cast<Instruction>(U.getUser()));
+            U.set(Replacement);
            }
+        }
        }
  
-      // If Arg is a no-op casted pointer, strip one level of casts and
-      // iterate.
+      // If Arg is a no-op casted pointer, strip one level of casts and iterate.
        if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
          Arg = BI->getOperand(0);
        else if (isa<GEPOperator>(Arg) &&
@@ -3642,5 +4185,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
      }
    }
  
+  // If this function has no escaping allocas or suspicious vararg usage,
+  // objc_storeStrong calls can be marked with the "tail" keyword.
+  if (TailOkForStoreStrongs)
+    for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
+         E = StoreStrongCalls.end(); I != E; ++I)
+      (*I)->setTailCall();
+  StoreStrongCalls.clear();
+
    return Changed;
  }