X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FObjCARC.cpp;h=64f71d380a8ae2305464bc832e5c382e76d6b3f2;hb=c8e41c591741b3da1077f7000274ad040bef8002;hp=9654b1ecd306eb7550a36877d4b7310b7c73181f;hpb=1b31ea8f935d4b643abf100c4943180c9ed8ba1a;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 9654b1ecd30..64f71d380a8 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -20,7 +20,7 @@ // This file also defines a simple ARC-aware AliasAnalysis. // // WARNING: This file knows about certain library functions. It recognizes them -// by name, and hardwires knowedge of their semantics. +// by name, and hardwires knowledge of their semantics. // // WARNING: This file knows about how certain Objective-C library functions are // used. Naive LLVM IR transformations which would otherwise be @@ -29,18 +29,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/GlobalVariable.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; // A handy option to enable/disable all optimizations in this file. @@ -88,13 +78,14 @@ namespace { } #endif - ValueT &operator[](KeyT Arg) { + ValueT &operator[](const KeyT &Arg) { std::pair Pair = Map.insert(std::make_pair(Arg, size_t(0))); if (Pair.second) { - Pair.first->second = Vector.size(); + size_t Num = Vector.size(); + Pair.first->second = Num; Vector.push_back(std::make_pair(Arg, ValueT())); - return Vector.back().second; + return Vector[Num].second; } return Vector[Pair.first->second].second; } @@ -104,14 +95,15 @@ namespace { std::pair Pair = Map.insert(std::make_pair(InsertPair.first, size_t(0))); if (Pair.second) { - Pair.first->second = Vector.size(); + size_t Num = Vector.size(); + Pair.first->second = Num; Vector.push_back(InsertPair); - return std::make_pair(llvm::prior(Vector.end()), true); + return std::make_pair(Vector.begin() + Num, true); } return std::make_pair(Vector.begin() + Pair.first->second, false); } - const_iterator find(KeyT Key) const { + const_iterator find(const KeyT &Key) const { typename MapTy::const_iterator It = Map.find(Key); if (It == Map.end()) return Vector.end(); return Vector.begin() + It->second; @@ -121,7 +113,7 @@ namespace { /// from the vector, it just zeros out the key in the vector. This leaves /// iterators intact, but clients must be prepared for zeroed-out keys when /// iterating. - void blot(KeyT Key) { + void blot(const KeyT &Key) { typename MapTy::iterator It = Map.find(Key); if (It == Map.end()) return; Vector[It->second].first = KeyT(); @@ -139,6 +131,13 @@ namespace { // ARC Utilities. //===----------------------------------------------------------------------===// +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/ADT/StringSwitch.h" + namespace { /// InstructionClass - A simple classification for instructions. enum InstructionClass { @@ -160,6 +159,7 @@ namespace { IC_MoveWeak, ///< objc_moveWeak (derived) IC_CopyWeak, ///< objc_copyWeak (derived) IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_StoreStrong, ///< objc_storeStrong (derived) IC_CallOrUser, ///< could call objc_release and/or "use" pointers IC_Call, ///< could call objc_release IC_User, ///< could "use" a pointer @@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) { Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; - // Only consider values with pointer types, and not function pointers. + // Only consider values with pointer types. + // It seemes intuitive to exclude function pointer types as well, since + // functions are never reference-counted, however clang occasionally + // bitcasts reference-counted pointers to function-pointer type + // temporarily. PointerType *Ty = dyn_cast(Op->getType()); - if (!Ty || isa(Ty->getElementType())) + if (!Ty) return false; // Conservatively assume anything else is a potential use. return true; @@ -256,6 +260,7 @@ static InstructionClass GetFunctionClass(const Function *F) { return StringSwitch(F->getName()) .Case("objc_storeWeak", IC_StoreWeak) .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) .Default(IC_CallOrUser); // Second argument is i8**. if (PointerType *Pte1 = dyn_cast(ETy1)) @@ -291,22 +296,23 @@ static InstructionClass GetInstructionClass(const Value *V) { // None of the intrinsic functions do objc_release. For intrinsics, the // only question is whether or not they may be users. switch (F->getIntrinsicID()) { - case 0: break; - case Intrinsic::bswap: case Intrinsic::ctpop: - case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::returnaddress: case Intrinsic::frameaddress: case Intrinsic::stacksave: case Intrinsic::stackrestore: case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + case Intrinsic::objectsize: case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: case Intrinsic::invariant_end: // Don't let dbg info affect our results. case Intrinsic::dbg_declare: case Intrinsic::dbg_value: // Short cut: Some intrinsics obviously don't use ObjC pointers. return IC_None; default: - for (Function::const_arg_iterator AI = F->arg_begin(), - AE = F->arg_end(); AI != AE; ++AI) - if (IsPotentialUse(AI)) - return IC_User; - return IC_None; + break; } } return GetCallSiteClass(CI); @@ -344,6 +350,10 @@ static InstructionClass GetInstructionClass(const Value *V) { break; default: // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) if (IsPotentialUse(*OI)) @@ -367,17 +377,17 @@ static InstructionClass GetBasicInstructionClass(const Value *V) { } // Otherwise, be conservative. - return IC_User; + return isa(V) ? IC_CallOrUser : IC_User; } -/// IsRetain - Test if the the given class is objc_retain or +/// IsRetain - Test if the given class is objc_retain or /// equivalent. static bool IsRetain(InstructionClass Class) { return Class == IC_Retain || Class == IC_RetainRV; } -/// IsAutorelease - Test if the the given class is objc_autorelease or +/// IsAutorelease - Test if the given class is objc_autorelease or /// equivalent. static bool IsAutorelease(InstructionClass Class) { return Class == IC_Autorelease || @@ -421,9 +431,10 @@ static bool IsAlwaysTail(InstructionClass Class) { /// IsNoThrow - Test if the given class represents instructions which are always /// safe to mark with the nounwind attribute.. static bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. return Class == IC_Retain || Class == IC_RetainRV || - Class == IC_RetainBlock || Class == IC_Release || Class == IC_Autorelease || Class == IC_AutoreleaseRV || @@ -431,7 +442,7 @@ static bool IsNoThrow(InstructionClass Class) { Class == IC_AutoreleasepoolPop; } -/// EraseInstruction - Erase the given instruction. ObjC calls return their +/// EraseInstruction - Erase the given instruction. Many ObjC calls return their /// argument verbatim, so if it's such a call and the return value has users, /// replace them with the argument value. static void EraseInstruction(Instruction *CI) { @@ -552,9 +563,8 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return Arg; } - // If we found an identifiable object but it has multiple uses, but they - // are trivial uses, we can still consider this to be a single-use - // value. + // If we found an identifiable object but it has multiple uses, but they are + // trivial uses, we can still consider this to be a single-use value. if (IsObjCIdentifiedObject(Arg)) { for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE; ++UI) { @@ -592,6 +602,59 @@ static bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_unretainedPointer"); } +/// DoesObjCBlockEscape - Test whether the given pointer, which is an +/// Objective C block pointer, does not "escape". This differs from regular +/// escape analysis in that a use as an argument to a call is not considered +/// an escape. +static bool DoesObjCBlockEscape(const Value *BlockPtr) { + // Walk the def-use chains. + SmallVector Worklist; + Worklist.push_back(BlockPtr); + do { + const Value *V = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const User *UUser = *UI; + // Special - Use by a call (callee or argument) is not considered + // to be an escape. + switch (GetBasicInstructionClass(UUser)) { + case IC_StoreWeak: + case IC_InitWeak: + case IC_StoreStrong: + case IC_Autorelease: + case IC_AutoreleaseRV: + // These special functions make copies of their pointer arguments. + return true; + case IC_User: + case IC_None: + // Use by an instruction which copies the value is an escape if the + // result is an escape. + if (isa(UUser) || isa(UUser) || + isa(UUser) || isa(UUser)) { + Worklist.push_back(UUser); + continue; + } + // Use by a load is not an escape. + if (isa(UUser)) + continue; + // Use by a store is not an escape if the use is the address. + if (const StoreInst *SI = dyn_cast(UUser)) + if (V != SI->getValueOperand()) + continue; + break; + default: + // Regular calls and other stuff are not considered escapes. + continue; + } + // Otherwise, conservatively assume an escape. + return true; + } + } while (!Worklist.empty()); + + // No escapes found. + return false; +} + //===----------------------------------------------------------------------===// // ARC AliasAnalysis. //===----------------------------------------------------------------------===// @@ -626,7 +689,7 @@ namespace { /// specified pass info. virtual void *getAdjustedAnalysisPointer(const void *PI) { if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; + return static_cast(this); return this; } @@ -742,7 +805,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { switch (GetBasicInstructionClass(CS.getInstruction())) { case IC_Retain: case IC_RetainRV: - case IC_RetainBlock: case IC_Autorelease: case IC_AutoreleaseRV: case IC_NoopCast: @@ -750,6 +812,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, because it updates + // pointers when it copies block data. return NoModRef; default: break; @@ -831,7 +895,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // These calls return their argument verbatim, as a low-level // optimization. However, this makes high-level optimizations // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in a later pass. + // emitted here. We'll redo them in the contract pass. Changed = true; Inst->replaceAllUsesWith(cast(Inst)->getArgOperand(0)); break; @@ -843,6 +907,149 @@ bool ObjCARCExpand::runOnFunction(Function &F) { return Changed; } +//===----------------------------------------------------------------------===// +// ARC autorelease pool elimination. +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/ADT/STLExtras.h" + +namespace { + /// ObjCARCAPElim - Autorelease pool elimination. + class ObjCARCAPElim : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + + static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); + static bool OptimizeBB(BasicBlock *BB); + + public: + static char ID; + ObjCARCAPElim() : ModulePass(ID) { + initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCAPElim::ID = 0; +INITIALIZE_PASS(ObjCARCAPElim, + "objc-arc-apelim", + "ObjC ARC autorelease pool elimination", + false, false) + +Pass *llvm::createObjCARCAPElimPass() { + return new ObjCARCAPElim(); +} + +void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +/// MayAutorelease - Interprocedurally determine if calls made by the +/// given call site can possibly produce autoreleases. +bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { + if (const Function *Callee = CS.getCalledFunction()) { + if (Callee->isDeclaration() || Callee->mayBeOverridden()) + return true; + for (Function::const_iterator I = Callee->begin(), E = Callee->end(); + I != E; ++I) { + const BasicBlock *BB = I; + for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); + J != F; ++J) + if (ImmutableCallSite JCS = ImmutableCallSite(J)) + // This recursion depth limit is arbitrary. It's just great + // enough to cover known interesting testcases. + if (Depth < 3 && + !JCS.onlyReadsMemory() && + MayAutorelease(JCS, Depth + 1)) + return true; + } + return false; + } + + return true; +} + +bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { + bool Changed = false; + + Instruction *Push = 0; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPush: + Push = Inst; + break; + case IC_AutoreleasepoolPop: + // If this pop matches a push and nothing in between can autorelease, + // zap the pair. + if (Push && cast(Inst)->getArgOperand(0) == Push) { + Changed = true; + Inst->eraseFromParent(); + Push->eraseFromParent(); + } + Push = 0; + break; + case IC_CallOrUser: + if (MayAutorelease(ImmutableCallSite(Inst))) + Push = 0; + break; + default: + break; + } + } + + return Changed; +} + +bool ObjCARCAPElim::runOnModule(Module &M) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!ModuleHasARC(M)) + return false; + + // Find the llvm.global_ctors variable, as the first step in + // identifying the global constructors. In theory, unnecessary autorelease + // pools could occur anywhere, but in practice it's pretty rare. Global + // ctors are a place where autorelease pools get inserted automatically, + // so it's pretty common for them to be unnecessary, and it's pretty + // profitable to eliminate them. + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return false; + + assert(GV->hasDefinitiveInitializer() && + "llvm.global_ctors is uncooperative!"); + + bool Changed = false; + + // Dig the constructor functions out of GV's initializer. + ConstantArray *Init = cast(GV->getInitializer()); + for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); + OI != OE; ++OI) { + Value *Op = *OI; + // llvm.global_ctors is an array of pairs where the second members + // are constructor functions. + Function *F = dyn_cast(cast(Op)->getOperand(1)); + // If the user used a constructor function with the wrong signature and + // it got bitcasted or whatever, look the other way. + if (!F) + continue; + // Only look at function definitions. + if (F->isDeclaration()) + continue; + // Only look at functions with one basic block. + if (llvm::next(F->begin()) != F->end()) + continue; + // Ok, a single-block constructor function definition. Try to optimize it. + Changed |= OptimizeBB(F->begin()); + } + + return Changed; +} + //===----------------------------------------------------------------------===// // ARC optimization. //===----------------------------------------------------------------------===// @@ -885,13 +1092,10 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // TODO: Delete release+retain pairs (rare). -#include "llvm/GlobalAlias.h" -#include "llvm/Constants.h" #include "llvm/LLVMContext.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -939,22 +1143,13 @@ bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { // If the values are Selects with the same condition, we can do a more precise // check: just check for relations between the values on corresponding arms. if (const SelectInst *SB = dyn_cast(B)) - if (A->getCondition() == SB->getCondition()) { - if (related(A->getTrueValue(), SB->getTrueValue())) - return true; - if (related(A->getFalseValue(), SB->getFalseValue())) - return true; - return false; - } + if (A->getCondition() == SB->getCondition()) + return related(A->getTrueValue(), SB->getTrueValue()) || + related(A->getFalseValue(), SB->getFalseValue()); // Check both arms of the Select node individually. - if (related(A->getTrueValue(), B)) - return true; - if (related(A->getFalseValue(), B)) - return true; - - // The arms both checked out. - return false; + return related(A->getTrueValue(), B) || + related(A->getFalseValue(), B); } bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { @@ -1165,7 +1360,8 @@ namespace { SmallPtrSet ReverseInsertPts; RRInfo() : - KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false), + KnownSafe(false), IsRetainBlock(false), + IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); @@ -1185,36 +1381,39 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// RefCount - The known minimum number of reference count increments. - unsigned RefCount; - /// NestCount - The known minimum level of retain+release nesting. unsigned NestCount; + /// KnownPositiveRefCount - True if the reference count is known to + /// be incremented. + bool KnownPositiveRefCount; + + /// Partial - True of we've seen an opportunity for partial RR elimination, + /// such as pushing calls into a CFG triangle or into one side of a + /// CFG diamond. + bool Partial; + /// Seq - The current position in the sequence. - Sequence Seq; + Sequence Seq : 8; public: /// RRI - Unidirectional information about the current sequence. /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : RefCount(0), NestCount(0), Seq(S_None) {} - - void SetAtLeastOneRefCount() { - if (RefCount == 0) RefCount = 1; - } + PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + Seq(S_None) {} - void IncrementRefCount() { - if (RefCount != UINT_MAX) ++RefCount; + void SetKnownPositiveRefCount() { + KnownPositiveRefCount = true; } - void DecrementRefCount() { - if (RefCount != 0) --RefCount; + void ClearRefCount() { + KnownPositiveRefCount = false; } bool IsKnownIncremented() const { - return RefCount > 0; + return KnownPositiveRefCount; } void IncrementNestCount() { @@ -1233,22 +1432,17 @@ namespace { Seq = NewSeq; } - void SetSeqToRelease(MDNode *M) { - if (Seq == S_None || Seq == S_Use) { - Seq = M ? S_MovableRelease : S_Release; - RRI.ReleaseMetadata = M; - } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { - Seq = S_Release; - RRI.ReleaseMetadata = 0; - } - } - Sequence GetSeq() const { return Seq; } void ClearSequenceProgress() { - Seq = S_None; + ResetSequenceProgress(S_None); + } + + void ResetSequenceProgress(Sequence NewSeq) { + Seq = NewSeq; + Partial = false; RRI.clear(); } @@ -1259,25 +1453,40 @@ namespace { void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); - RefCount = std::min(RefCount, Other.RefCount); + KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) Seq = S_None; + // If we're not in a sequence (anymore), drop all associated state. if (Seq == S_None) { + Partial = false; RRI.clear(); + } else if (Partial || Other.Partial) { + // If we're doing a merge on a path that's previously seen a partial + // merge, conservatively drop the sequence, to avoid doing partial + // RR elimination. If the branch predicates for the two merge differ, + // mixing them is unsafe. + ClearSequenceProgress(); } else { // Conservatively merge the ReleaseMetadata information. if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) RRI.ReleaseMetadata = 0; RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; - RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && + Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); - RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(), - Other.RRI.ReverseInsertPts.end()); + + // Merge the insert point sets. If there are any differences, + // that makes this a partial merge. + Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); + for (SmallPtrSet::const_iterator + I = Other.RRI.ReverseInsertPts.begin(), + E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) + Partial |= RRI.ReverseInsertPts.insert(*I); } } @@ -1303,6 +1512,11 @@ namespace { /// known about a pointer at the top of each block. MapTy PerPtrBottomUp; + /// Preds, Succs - Effective successors and predecessors of the current + /// block (this ignores ignorable edges and ignored backedges). + SmallVector Preds; + SmallVector Succs; + public: BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} @@ -1360,14 +1574,22 @@ namespace { /// entry to an exit which pass through this block. This is only valid /// after both the top-down and bottom-up traversals are complete. unsigned GetAllPathCount() const { + assert(TopDownPathCount != 0); + assert(BottomUpPathCount != 0); return TopDownPathCount * BottomUpPathCount; } - /// IsVisitedTopDown - Test whether the block for this BBState has been - /// visited by the top-down portion of the algorithm. - bool isVisitedTopDown() const { - return TopDownPathCount != 0; - } + // Specialized CFG utilities. + typedef SmallVectorImpl::const_iterator edge_iterator; + edge_iterator pred_begin() { return Preds.begin(); } + edge_iterator pred_end() { return Preds.end(); } + edge_iterator succ_begin() { return Succs.begin(); } + edge_iterator succ_end() { return Succs.end(); } + + void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } + void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } + + bool isExit() const { return Succs.empty(); } }; } @@ -1454,6 +1676,14 @@ namespace { /// metadata. unsigned ImpreciseReleaseMDKind; + /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape + /// metadata. + unsigned CopyOnEscapeMDKind; + + /// NoObjCARCExceptionsMDKind - The Metadata Kind for + /// clang.arc.no_objc_arc_exceptions metadata. + unsigned NoObjCARCExceptionsMDKind; + Constant *getRetainRVCallee(Module *M); Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); @@ -1461,6 +1691,8 @@ namespace { Constant *getRetainBlockCallee(Module *M); Constant *getAutoreleaseCallee(Module *M); + bool IsRetainBlockOptimizable(const Instruction *Inst); + void OptimizeRetainCall(Function &F, Instruction *Retain); bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); @@ -1469,9 +1701,16 @@ namespace { void CheckForCFGHazards(const BasicBlock *BB, DenseMap &BBStates, BBState &MyStates) const; + bool VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector &Retains, + BBState &MyStates); bool VisitBottomUp(BasicBlock *BB, DenseMap &BBStates, MapVector &Retains); + bool VisitInstructionTopDown(Instruction *Inst, + DenseMap &Releases, + BBState &MyStates); bool VisitTopDown(BasicBlock *BB, DenseMap &BBStates, DenseMap &Releases); @@ -1528,16 +1767,29 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } +bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { + // Without the magic metadata tag, we have to assume this might be an + // objc_retainBlock call inserted to convert a block pointer to an id, + // in which case it really is needed. + if (!Inst->getMetadata(CopyOnEscapeMDKind)) + return false; + + // If the pointer "escapes" (not including being used in a call), + // the copy may be needed. + if (DoesObjCBlockEscape(Inst)) + return false; + + // Otherwise, it's not needed. + return true; +} + Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { if (!RetainRVCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainRVCallee = M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, Attributes); @@ -1549,12 +1801,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { if (!AutoreleaseRVCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); AutoreleaseRVCallee = M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, Attributes); @@ -1565,10 +1814,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Constant *ObjCARCOpt::getReleaseCallee(Module *M) { if (!ReleaseCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); ReleaseCallee = M->getOrInsertFunction( "objc_release", @@ -1581,10 +1828,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { Constant *ObjCARCOpt::getRetainCallee(Module *M) { if (!RetainCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainCallee = M->getOrInsertFunction( "objc_retain", @@ -1597,15 +1842,14 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { if (!RetainBlockCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. RetainBlockCallee = M->getOrInsertFunction( "objc_retainBlock", FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attributes); + AttrListPtr()); } return RetainBlockCallee; } @@ -1613,10 +1857,8 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); AutoreleaseCallee = M->getOrInsertFunction( "objc_autorelease", @@ -1730,6 +1972,7 @@ namespace { /// use here. enum DependenceKind { NeedsPositiveRetainCount, + AutoreleasePoolBoundary, CanChangeRetainCount, RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. @@ -1759,6 +2002,19 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, } } + case AutoreleasePoolBoundary: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // These mark the end and begin of an autorelease pool scope. + return true; + default: + // Nothing else does this. + return false; + } + } + case CanChangeRetainCount: { InstructionClass Class = GetInstructionClass(Inst); switch (Class) { @@ -1776,6 +2032,7 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, case RetainAutoreleaseDep: switch (GetBasicInstructionClass(Inst)) { case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: // Don't merge an objc_autorelease with an objc_retain inside a different // autoreleasepool scope. return true; @@ -1787,7 +2044,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, // Nothing else matters for objc_retainAutorelease formation. return false; } - break; case RetainAutoreleaseRVDep: { InstructionClass Class = GetBasicInstructionClass(Inst); @@ -1801,7 +2057,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, // retainAutoreleaseReturnValue formation. return CanInterruptRV(Class); } - break; } case RetainRVDep: @@ -1809,7 +2064,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, } llvm_unreachable("Invalid dependence flavor"); - return true; } /// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and @@ -1889,13 +2143,13 @@ static bool isNoopInstruction(const Instruction *I) { /// objc_retainAutoreleasedReturnValue if the operand is a return value. void ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - CallSite CS(GetObjCArg(Retain)); - Instruction *Call = CS.getInstruction(); + ImmutableCallSite CS(GetObjCArg(Retain)); + const Instruction *Call = CS.getInstruction(); if (!Call) return; if (Call->getParent() != Retain->getParent()) return; // Check that the call is next to the retain. - BasicBlock::iterator I = Call; + BasicBlock::const_iterator I = Call; ++I; while (isNoopInstruction(I)) ++I; if (&*I != Retain) @@ -1908,22 +2162,30 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { } /// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into -/// objc_retain if the operand is not a return value. Or, if it can be -/// paired with an objc_autoreleaseReturnValue, delete the pair and -/// return true. +/// objc_retain if the operand is not a return value. Or, if it can be paired +/// with an objc_autoreleaseReturnValue, delete the pair and return true. bool ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { - // Check for the argument being from an immediately preceding call. - Value *Arg = GetObjCArg(RetainRV); - CallSite CS(Arg); - if (Instruction *Call = CS.getInstruction()) + // Check for the argument being from an immediately preceding call or invoke. + const Value *Arg = GetObjCArg(RetainRV); + ImmutableCallSite CS(Arg); + if (const Instruction *Call = CS.getInstruction()) { if (Call->getParent() == RetainRV->getParent()) { - BasicBlock::iterator I = Call; + BasicBlock::const_iterator I = Call; ++I; while (isNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; + } else if (const InvokeInst *II = dyn_cast(Call)) { + BasicBlock *RetainRVParent = RetainRV->getParent(); + if (II->getNormalDest() == RetainRVParent) { + BasicBlock::const_iterator I = RetainRVParent->begin(); + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } } + } // Check for being preceded by an objc_autoreleaseReturnValue on the same // pointer. In this case, we can delete the pair. @@ -2009,6 +2271,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_DestroyWeak: { CallInst *CI = cast(Inst); if (isNullOrUndef(CI->getArgOperand(0))) { + Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast(Ty)->getElementType()), Constant::getNullValue(Ty), @@ -2024,6 +2287,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *CI = cast(Inst); if (isNullOrUndef(CI->getArgOperand(0)) || isNullOrUndef(CI->getArgOperand(1))) { + Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast(Ty)->getElementType()), Constant::getNullValue(Ty), @@ -2137,9 +2401,35 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Check that there is nothing that cares about the reference // count between the call and the phi. - FindDependencies(NeedsPositiveRetainCount, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); + switch (Class) { + case IC_Retain: + case IC_RetainBlock: + // These can always be moved up. + break; + case IC_Release: + // These can't be moved across things that care about the retain + // count. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_Autorelease: + // These can't be moved across autorelease pool scope boundaries. + FindDependencies(AutoreleasePoolBoundary, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_RetainRV: + case IC_AutoreleaseRV: + // Don't move these; the RV optimization depends on the autoreleaseRV + // being tail called, and the retainRV being immediately after a call + // (which might still happen if we get lucky with codegen layout, but + // it's not worth taking the chance). + continue; + default: + llvm_unreachable("Invalid dependence flavor"); + } + if (DependingInstructions.size() == 1 && *DependingInstructions.begin() == PN) { Changed = true; @@ -2179,7 +2469,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, BBState &MyStates) const { // If any top-down local-use or possible-dec has a succ which is earlier in // the sequence, forget it. - for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); I != E; ++I) switch (I->second.GetSeq()) { default: break; @@ -2188,14 +2478,33 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, const TerminatorInst *TI = cast(&BB->back()); bool SomeSuccHasSame = false; bool AllSuccsHaveSame = true; - PtrState &S = MyStates.getPtrTopDownState(Arg); - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { - PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); - switch (SuccS.GetSeq()) { + PtrState &S = I->second; + succ_const_iterator SI(TI), SE(TI, false); + + // If the terminator is an invoke marked with the + // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be + // ignored, for ARC purposes. + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + --SE; + + for (; SI != SE; ++SI) { + Sequence SuccSSeq = S_None; + bool SuccSRRIKnownSafe = false; + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + switch (SuccSSeq) { case S_None: case S_CanRelease: { - if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { S.ClearSequenceProgress(); + break; + } continue; } case S_Use: @@ -2204,7 +2513,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, case S_Stop: case S_Release: case S_MovableRelease: - if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) AllSuccsHaveSame = false; break; case S_Retain: @@ -2216,19 +2525,39 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } case S_CanRelease: { const Value *Arg = I->first; const TerminatorInst *TI = cast(&BB->back()); bool SomeSuccHasSame = false; bool AllSuccsHaveSame = true; - PtrState &S = MyStates.getPtrTopDownState(Arg); - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { - PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); - switch (SuccS.GetSeq()) { + PtrState &S = I->second; + succ_const_iterator SI(TI), SE(TI, false); + + // If the terminator is an invoke marked with the + // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be + // ignored, for ARC purposes. + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + --SE; + + for (; SI != SE; ++SI) { + Sequence SuccSSeq = S_None; + bool SuccSRRIKnownSafe = false; + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + switch (SuccSSeq) { case S_None: { - if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { S.ClearSequenceProgress(); + break; + } continue; } case S_CanRelease: @@ -2238,7 +2567,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, case S_Release: case S_MovableRelease: case S_Use: - if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) AllSuccsHaveSame = false; break; case S_Retain: @@ -2250,10 +2579,165 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } } } +bool +ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector &Retains, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + NestingDetected = true; + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; + S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.IncrementNestCount(); + break; + } + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.SetKnownPositiveRefCount(); + S.DecrementNestCount(); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + return NestingDetected; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.ClearRefCount(); + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + assert(S.RRI.ReverseInsertPts.empty()); + // As above; handle invoke specially. + if (isa(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + return NestingDetected; +} + bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, DenseMap &BBStates, @@ -2263,165 +2747,177 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - const TerminatorInst *TI = cast(&BB->back()); - succ_const_iterator SI(TI), SE(TI, false); - if (SI == SE) - MyStates.SetAsExit(); - else - do { - const BasicBlock *Succ = *SI++; - if (Succ == BB) - continue; - DenseMap::iterator I = BBStates.find(Succ); - // If we haven't seen this node yet, then we've found a CFG cycle. - // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (I == BBStates.end()) - continue; - MyStates.InitFromSucc(I->second); - while (SI != SE) { - Succ = *SI++; - if (Succ != BB) { - I = BBStates.find(Succ); - if (I != BBStates.end()) - MyStates.MergeSucc(I->second); - } - } - break; - } while (SI != SE); + for (BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + DenseMap::iterator I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.InitFromSucc(I->second); + ++SI; + for (; SI != SE; ++SI) { + Succ = *SI; + I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.MergeSucc(I->second); + } + break; + } // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { Instruction *Inst = llvm::prior(I); - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - switch (Class) { - case IC_Release: { - Arg = GetObjCArg(Inst); + // Invoke instructions are visited as part of their successors (below). + if (isa(Inst)) + continue; - PtrState &S = MyStates.getPtrBottomUpState(Arg); + NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); + } + + // If there's a predecessor with an invoke, visit the invoke as if it were + // part of this block, since we can't insert code after an invoke in its own + // block, and we don't want to split critical edges. + for (BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + if (InvokeInst *II = dyn_cast(&Pred->back())) + NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); + } - // If we see two releases in a row on the same pointer. If so, make + return NestingDetected; +} + +bool +ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, + DenseMap &Releases, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. // Theoretically we could implement removal of nested retain+release // pairs by making PtrState hold a stack of states, but this is // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + if (S.GetSeq() == S_Retain) NestingDetected = true; - S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); - S.RRI.clear(); - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); - S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); + S.ResetSequenceProgress(S_Retain); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + // Don't check S.IsKnownIncremented() here because it's not sufficient. + S.RRI.KnownSafe = S.IsKnownNested(); S.RRI.Calls.insert(Inst); + } - S.IncrementRefCount(); - S.IncrementNestCount(); + S.IncrementNestCount(); + return NestingDetected; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.DecrementNestCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); break; + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); } - case IC_RetainBlock: - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } - PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.DecrementRefCount(); - S.SetAtLeastOneRefCount(); - S.DecrementNestCount(); + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.ClearRefCount(); + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); - switch (S.GetSeq()) { - case S_Stop: - case S_Release: - case S_MovableRelease: + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; case S_Use: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH case S_CanRelease: - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - Retains[Inst] = S.RRI; - } - S.ClearSequenceProgress(); - break; case S_None: break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - continue; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearBottomUpPointers(); - continue; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - continue; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.DecrementRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { + case S_Stop: case S_Release: case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - S.RRI.ReverseInsertPts.clear(); - S.RRI.ReverseInsertPts.insert(Inst); - S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - S.RRI.ReverseInsertPts.clear(); - S.RRI.ReverseInsertPts.insert(Inst); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); + llvm_unreachable("top-down pointer in release state!"); } } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Retain: + case S_Use: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } } return NestingDetected; @@ -2436,167 +2932,117 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - const_pred_iterator PI(BB), PE(BB, false); - if (PI == PE) - MyStates.SetAsEntry(); - else - do { - const BasicBlock *Pred = *PI++; - if (Pred == BB) - continue; - DenseMap::iterator I = BBStates.find(Pred); + for (BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); PI != PE; ++PI) { + const BasicBlock *Pred = *PI; + DenseMap::iterator I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.InitFromPred(I->second); + ++PI; + for (; PI != PE; ++PI) { + Pred = *PI; + I = BBStates.find(Pred); assert(I != BBStates.end()); - // If we haven't seen this node yet, then we've found a CFG cycle. - // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (!I->second.isVisitedTopDown()) - continue; - MyStates.InitFromPred(I->second); - while (PI != PE) { - Pred = *PI++; - if (Pred != BB) { - I = BBStates.find(Pred); - assert(I != BBStates.end()); - if (I->second.isVisitedTopDown()) - MyStates.MergePred(I->second); - } - } - break; - } while (PI != PE); + MyStates.MergePred(I->second); + } + break; + } // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; + NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); + } - switch (Class) { - case IC_RetainBlock: - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); + CheckForCFGHazards(BB, BBStates, MyStates); + return NestingDetected; +} - PtrState &S = MyStates.getPtrTopDownState(Arg); +static void +ComputePostOrders(Function &F, + SmallVectorImpl &PostOrder, + SmallVectorImpl &ReverseCFGPostOrder, + unsigned NoObjCARCExceptionsMDKind, + DenseMap &BBStates) { + /// Visited - The visited set, for doing DFS walks. + SmallPtrSet Visited; - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - S.SetSeq(S_Retain); - S.RRI.clear(); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not - // sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); - S.RRI.Calls.insert(Inst); + // Do DFS, computing the PostOrder. + SmallPtrSet OnStack; + SmallVector, 16> SuccStack; + + // Functions always have exactly one entry block, and we don't have + // any other block that we treat like an entry block. + BasicBlock *EntryBB = &F.getEntryBlock(); + BBState &MyStates = BBStates[EntryBB]; + MyStates.SetAsEntry(); + TerminatorInst *EntryTI = cast(&EntryBB->back()); + SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); + Visited.insert(EntryBB); + OnStack.insert(EntryBB); + do { + dfs_next_succ: + BasicBlock *CurrBB = SuccStack.back().first; + TerminatorInst *TI = cast(&CurrBB->back()); + succ_iterator SE(TI, false); + + // If the terminator is an invoke marked with the + // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be + // ignored, for ARC purposes. + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + --SE; + + while (SuccStack.back().second != SE) { + BasicBlock *SuccBB = *SuccStack.back().second++; + if (Visited.insert(SuccBB)) { + TerminatorInst *TI = cast(&SuccBB->back()); + SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); + BBStates[CurrBB].addSucc(SuccBB); + BBState &SuccStates = BBStates[SuccBB]; + SuccStates.addPred(CurrBB); + OnStack.insert(SuccBB); + goto dfs_next_succ; } - S.SetAtLeastOneRefCount(); - S.IncrementRefCount(); - S.IncrementNestCount(); - continue; + if (!OnStack.count(SuccBB)) { + BBStates[CurrBB].addSucc(SuccBB); + BBStates[SuccBB].addPred(CurrBB); + } } - case IC_Release: { - Arg = GetObjCArg(Inst); + OnStack.erase(CurrBB); + PostOrder.push_back(CurrBB); + SuccStack.pop_back(); + } while (!SuccStack.empty()); - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementRefCount(); - S.DecrementNestCount(); + Visited.clear(); - switch (S.GetSeq()) { - case S_Retain: - case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); - Releases[Inst] = S.RRI; - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - break; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearTopDownPointers(); - continue; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + // Functions may have many exits, and there also blocks which we treat + // as exits due to ignored edges. + SmallVector, 16> PredStack; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *ExitBB = I; + BBState &MyStates = BBStates[ExitBB]; + if (!MyStates.isExit()) continue; - default: - break; - } - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.DecrementRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - S.RRI.ReverseInsertPts.clear(); - S.RRI.ReverseInsertPts.insert(Inst); + MyStates.SetAsExit(); - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); + PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); + Visited.insert(ExitBB); + while (!PredStack.empty()) { + reverse_dfs_next_succ: + BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); + while (PredStack.back().second != PE) { + BasicBlock *BB = *PredStack.back().second++; + if (Visited.insert(BB)) { + PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); + goto reverse_dfs_next_succ; } } - - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_Use: - case S_Retain: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } + ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); } } - - CheckForCFGHazards(BB, BBStates, MyStates); - return NestingDetected; } // Visit - Visit the function both top-down and bottom-up. @@ -2605,43 +3051,31 @@ ObjCARCOpt::Visit(Function &F, DenseMap &BBStates, MapVector &Retains, DenseMap &Releases) { - // Use reverse-postorder on the reverse CFG for bottom-up, because we - // magically know that loops will be well behaved, i.e. they won't repeatedly - // call retain on a single pointer without doing a release. We can't use - // ReversePostOrderTraversal here because we want to walk up from each - // function exit point. - SmallPtrSet Visited; - SmallVector, 16> Stack; - SmallVector Order; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *BB = I; - if (BB->getTerminator()->getNumSuccessors() == 0) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); - } - while (!Stack.empty()) { - pred_iterator End = pred_end(Stack.back().first); - while (Stack.back().second != End) { - BasicBlock *BB = *Stack.back().second++; - if (Visited.insert(BB)) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); - } - Order.push_back(Stack.pop_back_val().first); - } + + // Use reverse-postorder traversals, because we magically know that loops + // will be well behaved, i.e. they won't repeatedly call retain on a single + // pointer without doing a release. We can't use the ReversePostOrderTraversal + // class here because we want the reverse-CFG postorder to consider each + // function exit point, and we want to ignore selected cycle edges. + SmallVector PostOrder; + SmallVector ReverseCFGPostOrder; + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, + NoObjCARCExceptionsMDKind, + BBStates); + + // Use reverse-postorder on the reverse CFG for bottom-up. bool BottomUpNestingDetected = false; for (SmallVectorImpl::const_reverse_iterator I = - Order.rbegin(), E = Order.rend(); I != E; ++I) { - BasicBlock *BB = *I; - BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); - } + ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); + I != E; ++I) + BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - // Use regular reverse-postorder for top-down. + // Use reverse-postorder for top-down. bool TopDownNestingDetected = false; - typedef ReversePostOrderTraversal RPOTType; - RPOTType RPOT(&F); - for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BasicBlock *BB = *I; - TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); - } + for (SmallVectorImpl::const_reverse_iterator I = + PostOrder.rbegin(), E = PostOrder.rend(); + I != E; ++I) + TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); return TopDownNestingDetected && BottomUpNestingDetected; } @@ -2669,40 +3103,26 @@ void ObjCARCOpt::MoveCalls(Value *Arg, getRetainBlockCallee(M) : getRetainCallee(M), MyArg, "", InsertPt); Call->setDoesNotThrow(); - if (!RetainsToMove.IsRetainBlock) + if (RetainsToMove.IsRetainBlock) + Call->setMetadata(CopyOnEscapeMDKind, + MDNode::get(M->getContext(), ArrayRef())); + else Call->setTailCall(); } for (SmallPtrSet::const_iterator PI = RetainsToMove.ReverseInsertPts.begin(), PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *LastUse = *PI; - Instruction *InsertPts[] = { 0, 0, 0 }; - if (InvokeInst *II = dyn_cast(LastUse)) { - // We can't insert code immediately after an invoke instruction, so - // insert code at the beginning of both successor blocks instead. - // The invoke's return value isn't available in the unwind block, - // but our releases will never depend on it, because they must be - // paired with retains from before the invoke. - InsertPts[0] = II->getNormalDest()->getFirstNonPHI(); - InsertPts[1] = II->getUnwindDest()->getFirstNonPHI(); - } else { - // Insert code immediately after the last use. - InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); - } - - for (Instruction **I = InsertPts; *I; ++I) { - Instruction *InsertPt = *I; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, - "", InsertPt); - // Attach a clang.imprecise_release metadata tag, if appropriate. - if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); - Call->setDoesNotThrow(); - if (ReleasesToMove.IsTailCallRelease) - Call->setTailCall(); - } + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); } // Delete the original retain and release calls. @@ -2722,6 +3142,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } } +/// PerformCodePlacement - Identify pairings between the retains and releases, +/// and delete and/or move them. bool ObjCARCOpt::PerformCodePlacement(DenseMap &BBStates, @@ -2735,9 +3157,10 @@ ObjCARCOpt::PerformCodePlacement(DenseMap SmallVector NewReleases; SmallVector DeadInsts; + // Visit each retain. for (MapVector::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ) { - Value *V = (I++)->first; + E = Retains.end(); I != E; ++I) { + Value *V = I->first; if (!V) continue; // blotted Instruction *Retain = cast(V); @@ -2908,6 +3331,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap // Ok, everything checks out and we're all set. Let's move some code! Changed = true; + assert(OldCount != 0 && "Unreachable code?"); AnyPairsCompletelyEliminated = NewCount == 0; NumRRs += OldCount - NewCount; MoveCalls(Arg, RetainsToMove, ReleasesToMove, @@ -3048,7 +3472,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { if (AllocaInst *Alloca = dyn_cast(Arg)) { for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ++UI) { - Instruction *UserInst = cast(*UI); + const Instruction *UserInst = cast(*UI); switch (GetBasicInstructionClass(UserInst)) { case IC_InitWeak: case IC_StoreWeak: @@ -3062,8 +3486,18 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { CallInst *UserInst = cast(*UI++); - if (!UserInst->use_empty()) - UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + // These functions return their second argument. + UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); + break; + case IC_DestroyWeak: + // No return value. + break; + default: + llvm_unreachable("alloca really is used!"); + } UserInst->eraseFromParent(); } Alloca->eraseFromParent(); @@ -3131,8 +3565,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { dyn_cast_or_null(*DependingInstructions.begin()); if (!Autorelease) goto next_block; - InstructionClass AutoreleaseClass = - GetBasicInstructionClass(Autorelease); + InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); if (!IsAutorelease(AutoreleaseClass)) goto next_block; if (GetObjCArg(Autorelease) != Arg) @@ -3170,7 +3603,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { // Check that there is nothing that can affect the reference // count between the retain and the call. - FindDependencies(CanChangeRetainCount, Arg, BB, Retain, + // Note that Retain need not be in BB. + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, DependingInstructions, Visited, PA); if (DependingInstructions.size() != 1) goto next_block; @@ -3207,6 +3641,7 @@ bool ObjCARCOpt::doInitialization(Module &M) { if (!EnableARCOpts) return false; + // If nothing in the Module uses ARC, don't do anything. Run = ModuleHasARC(M); if (!Run) return false; @@ -3214,10 +3649,14 @@ bool ObjCARCOpt::doInitialization(Module &M) { // Identify the imprecise release metadata kind. ImpreciseReleaseMDKind = M.getContext().getMDKindID("clang.imprecise_release"); + CopyOnEscapeMDKind = + M.getContext().getMDKindID("clang.arc.copy_on_escape"); + NoObjCARCExceptionsMDKind = + M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release - // calls finalizers. + // calls finalizers which can have arbitrary side effects. // These are initialized lazily. RetainRVCallee = 0; @@ -3269,8 +3708,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) { while (OptimizeSequences(F)) {} // Optimizations if objc_autorelease is used. - if (UsedInThisFunction & - ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV))) + if (UsedInThisFunction & ((1 << IC_Autorelease) | + (1 << IC_AutoreleaseRV))) OptimizeReturns(F); return Changed; @@ -3315,6 +3754,11 @@ namespace { /// RetainRV calls to make the optimization work on targets which need it. const MDString *RetainRVMarker; + /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If + /// at the end of walking the function we have found no alloca + /// instructions, these calls can be marked "tail". + SmallPtrSet StoreStrongCalls; + Constant *getStoreStrongCallee(Module *M); Constant *getRetainAutoreleaseCallee(Module *M); Constant *getRetainAutoreleaseRVCallee(Module *M); @@ -3364,13 +3808,11 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *I8XX = PointerType::getUnqual(I8X); - std::vector Params; - Params.push_back(I8XX); - Params.push_back(I8X); + Type *Params[] = { I8XX, I8X }; - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); - Attributes.addAttr(1, Attribute::NoCapture); + AttrListPtr Attributes = AttrListPtr() + .addAttr(~0u, Attribute::NoUnwind) + .addAttr(1, Attribute::NoCapture); StoreStrongCallee = M->getOrInsertFunction( @@ -3385,12 +3827,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { if (!RetainAutoreleaseCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainAutoreleaseCallee = M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); } @@ -3401,12 +3840,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { if (!RetainAutoreleaseRVCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainAutoreleaseRVCallee = M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, Attributes); @@ -3414,8 +3850,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { return RetainAutoreleaseRVCallee; } -/// ContractAutorelease - Merge an autorelease with a retain into a fused -/// call. +/// ContractAutorelease - Merge an autorelease with a retain into a fused call. bool ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, InstructionClass Class, @@ -3470,24 +3905,47 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast(GetObjCArg(Release)); - if (!Load || Load->isVolatile()) return; + if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); if (Load->getParent() != BB) return; - // Walk down to find the store. + // Walk down to find the store and the release, which may be in either order. BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); - while (I != End && - (&*I == Release || - IsRetain(GetBasicInstructionClass(I)) || - !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) - ++I; - StoreInst *Store = dyn_cast(I); - if (!Store || Store->isVolatile()) return; - if (Store->getPointerOperand() != Loc.Ptr) return; + StoreInst *Store = 0; + bool SawRelease = false; + for (; !Store || !SawRelease; ++I) { + if (I == End) + return; + + Instruction *Inst = I; + if (Inst == Release) { + SawRelease = true; + continue; + } + + InstructionClass Class = GetBasicInstructionClass(Inst); + + // Unrelated retains are harmless. + if (IsRetain(Class)) + continue; + + if (Store) { + // The store is the point where we're going to put the objc_storeStrong, + // so make sure there are no uses after it. + if (CanUse(Inst, Load, PA, Class)) + return; + } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { + // We are moving the load down to the store, so check for anything + // else which writes to the memory between the load and the store. + Store = dyn_cast(Inst); + if (!Store || !Store->isSimple()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + } + } Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); @@ -3518,6 +3976,11 @@ void ObjCARCContract::ContractRelease(Instruction *Release, StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); + // We can't set the tail flag yet, because we haven't yet determined + // whether there are any escaping allocas. Remember this call, so that + // we can set the tail flag once we know it's safe. + StoreStrongCalls.insert(StoreStrong); + if (&*Iter == Store) ++Iter; Store->eraseFromParent(); Release->eraseFromParent(); @@ -3527,6 +3990,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, } bool ObjCARCContract::doInitialization(Module &M) { + // If nothing in the Module uses ARC, don't do anything. Run = ModuleHasARC(M); if (!Run) return false; @@ -3564,6 +4028,14 @@ bool ObjCARCContract::runOnFunction(Function &F) { PA.setAA(&getAnalysis()); + // Track whether it's ok to mark objc_storeStrong calls with the "tail" + // keyword. Be conservative if the function has variadic arguments. + // It seems that functions which "return twice" are also unsafe for the + // "tail" argument, because they are setjmp, which could need to + // return to an earlier stack state. + bool TailOkForStoreStrongs = !F.isVarArg() && + !F.callsFunctionThatReturnsTwice(); + // For ObjC library calls which return their argument, replace uses of the // argument with uses of the call return value, if it dominates the use. This // reduces register pressure. @@ -3592,9 +4064,24 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (!RetainRVMarker) break; BasicBlock::iterator BBI = Inst; - --BBI; - while (isNoopInstruction(BBI)) --BBI; + BasicBlock *InstParent = Inst->getParent(); + + // Step up to see if the call immediately precedes the RetainRV call. + // If it's an invoke, we have to cross a block boundary. And we have + // to carefully dodge no-op instructions. + do { + if (&*BBI == InstParent->begin()) { + BasicBlock *Pred = InstParent->getSinglePredecessor(); + if (!Pred) + goto decline_rv_optimization; + BBI = Pred->getTerminator(); + break; + } + --BBI; + } while (isNoopInstruction(BBI)); + if (&*BBI == GetObjCArg(Inst)) { + Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), @@ -3602,6 +4089,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } + decline_rv_optimization: break; } case IC_InitWeak: { @@ -3620,6 +4108,13 @@ bool ObjCARCContract::runOnFunction(Function &F) { case IC_Release: ContractRelease(Inst, I); continue; + case IC_User: + // Be conservative if the function has any alloca instructions. + // Technically we only care about escaping alloca instructions, + // but this is sufficient to handle some interesting cases. + if (isa(Inst)) + TailOkForStoreStrongs = false; + continue; default: continue; } @@ -3637,40 +4132,46 @@ bool ObjCARCContract::runOnFunction(Function &F) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. - if (Instruction *UserInst = dyn_cast(U.getUser())) - if (Inst != UserInst && DT->dominates(Inst, UserInst)) { - Changed = true; - Instruction *Replacement = Inst; - Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast(UserInst)) { - // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = - PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = - PHI->getIncomingBlock(ValNo); - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - &BB->back()); - for (unsigned i = 0, e = PHI->getNumIncomingValues(); - i != e; ++i) - if (PHI->getIncomingBlock(i) == BB) { - // Keep the UI iterator valid. - if (&PHI->getOperandUse( - PHINode::getOperandNumForIncomingValue(i)) == - &UI.getUse()) - ++UI; - PHI->setIncomingValue(i, Replacement); - } - } else { - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); - U.set(Replacement); - } + + // If the call's return value dominates a use of the call's argument + // value, rewrite the use to use the return value. We check for + // reachability here because an unreachable call is considered to + // trivially dominate itself, which would lead us to rewriting its + // argument in terms of its return value, which would lead to + // infinite loops in GetObjCArg. + if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { + Changed = true; + Instruction *Replacement = Inst; + Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast(U.getUser())) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + // While we're here, rewrite all edges for this PHI, rather + // than just one use at a time, to minimize the number of + // bitcasts we emit. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + cast(U.getUser())); + U.set(Replacement); } + } } - // If Arg is a no-op casted pointer, strip one level of casts and - // iterate. + // If Arg is a no-op casted pointer, strip one level of casts and iterate. if (const BitCastInst *BI = dyn_cast(Arg)) Arg = BI->getOperand(0); else if (isa(Arg) && @@ -3684,5 +4185,13 @@ bool ObjCARCContract::runOnFunction(Function &F) { } } + // If this function has no escaping allocas or suspicious vararg usage, + // objc_storeStrong calls can be marked with the "tail" keyword. + if (TailOkForStoreStrongs) + for (SmallPtrSet::iterator I = StoreStrongCalls.begin(), + E = StoreStrongCalls.end(); I != E; ++I) + (*I)->setTailCall(); + StoreStrongCalls.clear(); + return Changed; }