From: Dan Gohman Date: Fri, 9 Mar 2012 18:50:52 +0000 (+0000) Subject: When identifying exit nodes for the reverse-CFG reverse-post-order X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5992f67e683b665392f45b167fe5c9abd91455c9;p=oota-llvm.git When identifying exit nodes for the reverse-CFG reverse-post-order traversal, consider nodes for which the only successors are backedges which the traversal is ignoring to be exit nodes. This fixes a problem where the bottom-up traversal was failing to visit split blocks along split loop backedges. This fixes rdar://10989035. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152421 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 1c7f036232e..91dc23c56a8 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2929,11 +2929,17 @@ ComputePostOrders(Function &F, Visited.clear(); // Compute the exits, which are the starting points for reverse-CFG DFS. + // This includes blocks where all the successors are backedges that + // we're skipping. SmallVector Exits; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *BB = I; - if (cast(&BB->back())->getNumSuccessors() == 0) - Exits.push_back(BB); + TerminatorInst *TI = cast(&BB->back()); + for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI) + if (!Backedges.count(std::make_pair(BB, *SI))) + goto HasNonBackedgeSucc; + Exits.push_back(BB); + HasNonBackedgeSucc:; } // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll index 552f4e0b96c..08bd8c0601f 100644 --- a/test/Transforms/ObjCARC/basic.ll +++ b/test/Transforms/ObjCARC/basic.ll @@ -1721,6 +1721,82 @@ define void @test61() { ret void } +; Delete a retain matched by releases when one is inside the loop and the +; other is outside the loop. + +; CHECK: define void @test62( +; CHECK-NOT: @objc_ +; CHECK: } +define void @test62(i8* %x, i1* %p) nounwind { +entry: + br label %loop + +loop: + call i8* @objc_retain(i8* %x) + %q = load i1* %p + br i1 %q, label %loop.more, label %exit + +loop.more: + call void @objc_release(i8* %x) + br label %loop + +exit: + call void @objc_release(i8* %x) + ret void +} + +; Like test62 but with no release in exit. +; Don't delete anything! + +; CHECK: define void @test63( +; CHECK: loop: +; CHECK: tail call i8* @objc_retain(i8* %x) +; CHECK: loop.more: +; CHECK: call void @objc_release(i8* %x) +; CHECK: } +define void @test63(i8* %x, i1* %p) nounwind { +entry: + br label %loop + +loop: + call i8* @objc_retain(i8* %x) + %q = load i1* %p + br i1 %q, label %loop.more, label %exit + +loop.more: + call void @objc_release(i8* %x) + br label %loop + +exit: + ret void +} + +; Like test62 but with no release in loop.more. +; Don't delete anything! + +; CHECK: define void @test64( +; CHECK: loop: +; CHECK: tail call i8* @objc_retain(i8* %x) +; CHECK: exit: +; CHECK: call void @objc_release(i8* %x) +; CHECK: } +define void @test64(i8* %x, i1* %p) nounwind { +entry: + br label %loop + +loop: + call i8* @objc_retain(i8* %x) + %q = load i1* %p + br i1 %q, label %loop.more, label %exit + +loop.more: + br label %loop + +exit: + call void @objc_release(i8* %x) + ret void +} + declare void @bar(i32 ()*) ; A few real-world testcases. diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll index 9eada8a2ddb..a618a21d8bb 100644 --- a/test/Transforms/ObjCARC/nested.ll +++ b/test/Transforms/ObjCARC/nested.ll @@ -484,12 +484,14 @@ forcoll.empty: ret void } -; Delete a nested retain+release pair. +; TODO: Delete a nested retain+release pair. +; The optimizer currently can't do this, because of a split loop backedge. +; See test9b for the same testcase without a split backedge. ; CHECK: define void @test9( ; CHECK: call i8* @objc_retain ; CHECK: call i8* @objc_retain -; CHECK-NOT: @objc_retain +; CHECK: call i8* @objc_retain ; CHECK: } define void @test9() nounwind { entry: @@ -551,13 +553,79 @@ forcoll.empty: ret void } -; Delete a nested retain+release pair. +; Like test9, but without a split backedge. This we can optimize. -; CHECK: define void @test10( +; CHECK: define void @test9b( ; CHECK: call i8* @objc_retain ; CHECK: call i8* @objc_retain ; CHECK-NOT: @objc_retain ; CHECK: } +define void @test9b() nounwind { +entry: + %state.ptr = alloca %struct.__objcFastEnumerationState, align 8 + %items.ptr = alloca [16 x i8*], align 8 + %call = call i8* @returner() + %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind + %call1 = call i8* @returner() + %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind + %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false) + %2 = call i8* @objc_retain(i8* %0) nounwind + %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8 + %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16) + %iszero = icmp eq i64 %call4, 0 + br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit + +forcoll.loopinit: + %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2 + %mutationsptr = load i64** %mutationsptr.ptr, align 8 + %forcoll.initial-mutations = load i64* %mutationsptr, align 8 + br label %forcoll.loopbody.outer + +forcoll.loopbody.outer: + %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ] + %tmp9 = icmp ugt i64 %forcoll.count.ph, 1 + %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1 + br label %forcoll.loopbody + +forcoll.loopbody: + %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ] + %mutationsptr5 = load i64** %mutationsptr.ptr, align 8 + %statemutations = load i64* %mutationsptr5, align 8 + %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations + br i1 %3, label %forcoll.notmutated, label %forcoll.mutated + +forcoll.mutated: + call void @objc_enumerationMutation(i8* %2) + br label %forcoll.notmutated + +forcoll.notmutated: + %phitmp = add i64 %forcoll.index, 1 + %exitcond = icmp eq i64 %phitmp, %umax + br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody + +forcoll.refetch: + %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8 + %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16) + %4 = icmp eq i64 %call7, 0 + br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer + +forcoll.empty: + call void @objc_release(i8* %2) nounwind + call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0 + call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0 + ret void +} + +; TODO: Delete a nested retain+release pair. +; The optimizer currently can't do this, because of a split loop backedge. +; See test10b for the same testcase without a split backedge. + +; CHECK: define void @test10( +; CHECK: call i8* @objc_retain +; CHECK: call i8* @objc_retain +; CHECK: call i8* @objc_retain +; CHECK: } define void @test10() nounwind { entry: %state.ptr = alloca %struct.__objcFastEnumerationState, align 8 @@ -618,3 +686,68 @@ forcoll.empty: call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0 ret void } + +; Like test10, but without a split backedge. This we can optimize. + +; CHECK: define void @test10b( +; CHECK: call i8* @objc_retain +; CHECK: call i8* @objc_retain +; CHECK-NOT: @objc_retain +; CHECK: } +define void @test10b() nounwind { +entry: + %state.ptr = alloca %struct.__objcFastEnumerationState, align 8 + %items.ptr = alloca [16 x i8*], align 8 + %call = call i8* @returner() + %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind + %call1 = call i8* @returner() + %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind + call void @callee() + %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false) + %2 = call i8* @objc_retain(i8* %0) nounwind + %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8 + %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16) + %iszero = icmp eq i64 %call4, 0 + br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit + +forcoll.loopinit: + %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2 + %mutationsptr = load i64** %mutationsptr.ptr, align 8 + %forcoll.initial-mutations = load i64* %mutationsptr, align 8 + br label %forcoll.loopbody.outer + +forcoll.loopbody.outer: + %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ] + %tmp9 = icmp ugt i64 %forcoll.count.ph, 1 + %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1 + br label %forcoll.loopbody + +forcoll.loopbody: + %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ] + %mutationsptr5 = load i64** %mutationsptr.ptr, align 8 + %statemutations = load i64* %mutationsptr5, align 8 + %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations + br i1 %3, label %forcoll.notmutated, label %forcoll.mutated + +forcoll.mutated: + call void @objc_enumerationMutation(i8* %2) + br label %forcoll.notmutated + +forcoll.notmutated: + %phitmp = add i64 %forcoll.index, 1 + %exitcond = icmp eq i64 %phitmp, %umax + br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody + +forcoll.refetch: + %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8 + %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16) + %4 = icmp eq i64 %call7, 0 + br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer + +forcoll.empty: + call void @objc_release(i8* %2) nounwind + call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0 + call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0 + ret void +}