From: Dan Gohman Date: Fri, 9 Apr 2010 22:07:05 +0000 (+0000) Subject: When determining a canonical insert position, don't climb deeper X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e5f76877aee6f33964de105893f0ef338661ecad;p=oota-llvm.git When determining a canonical insert position, don't climb deeper into adjacent loops. Also, ensure that the insert position is dominated by the loop latch of any loop in the post-inc set which has a latch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100906 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 04f388477bc..7882a9aefa2 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1157,6 +1157,7 @@ class LSRInstance { IVUsers &IU; ScalarEvolution &SE; DominatorTree &DT; + LoopInfo &LI; const TargetLowering *const TLI; Loop *const L; bool Changed; @@ -1236,9 +1237,12 @@ public: DenseSet &VisitedRegs) const; void Solve(SmallVectorImpl &Solution) const; - BasicBlock::iterator AdjustInputPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU) const; + BasicBlock::iterator + HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl &Inputs) const; + BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const; Value *Expand(const LSRFixup &LF, const Formula &F, @@ -2813,12 +2817,64 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) { return Node->getBlock(); } -/// AdjustInputPositionForExpand - Determine an input position which will be +/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up +/// the dominator tree far as we can go while still being dominated by the +/// input positions. This helps canonicalize the insert position, which +/// encourages sharing. +BasicBlock::iterator +LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl &Inputs) + const { + for (;;) { + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); + unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; + + BasicBlock *IDom; + for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) { + IDom = getImmediateDominator(Rung, DT); + if (!IDom) return IP; + + // Don't climb into a loop though. + const Loop *IDomLoop = LI.getLoopFor(IDom); + unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; + if (IDomDepth <= IPLoopDepth && + (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) + break; + } + + bool AllDominate = true; + Instruction *BetterPos = 0; + Instruction *Tentative = IDom->getTerminator(); + for (SmallVectorImpl::const_iterator I = Inputs.begin(), + E = Inputs.end(); I != E; ++I) { + Instruction *Inst = *I; + if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { + AllDominate = false; + break; + } + // Attempt to find an insert position in the middle of the block, + // instead of at the end, so that it can be used for other expansions. + if (IDom == Inst->getParent() && + (!BetterPos || DT.dominates(BetterPos, Inst))) + BetterPos = next(BasicBlock::iterator(Inst)); + } + if (!AllDominate) + break; + if (BetterPos) + IP = BetterPos; + else + IP = Tentative; + } + + return IP; +} + +/// AdjustInsertPositionForExpand - Determine an input position which will be /// dominated by the operands and which will dominate the result. BasicBlock::iterator -LSRInstance::AdjustInputPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU) const { +LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const { // Collect some instructions which must be dominated by the // expanding replacement. These must be dominated by any operands that // will be required in the expansion. @@ -2842,6 +2898,7 @@ LSRInstance::AdjustInputPositionForExpand(BasicBlock::iterator IP, const Loop *PIL = *I; if (PIL == L) continue; + // Be dominated by the loop exit. SmallVector ExitingBlocks; PIL->getExitingBlocks(ExitingBlocks); if (!ExitingBlocks.empty()) { @@ -2850,34 +2907,15 @@ LSRInstance::AdjustInputPositionForExpand(BasicBlock::iterator IP, BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]); Inputs.push_back(BB->getTerminator()); } + + // Be dominated by the loop latch, if it's unique. + if (BasicBlock *Latch = PIL->getLoopLatch()) + Inputs.push_back(prior(BasicBlock::iterator(Latch->getTerminator()))); } // Then, climb up the immediate dominator tree as far as we can go while // still being dominated by the input positions. - for (;;) { - bool AllDominate = true; - Instruction *BetterPos = 0; - BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT); - if (!IDom) break; - Instruction *Tentative = IDom->getTerminator(); - for (SmallVectorImpl::const_iterator I = Inputs.begin(), - E = Inputs.end(); I != E; ++I) { - Instruction *Inst = *I; - if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { - AllDominate = false; - break; - } - if (IDom == Inst->getParent() && - (!BetterPos || DT.dominates(BetterPos, Inst))) - BetterPos = next(BasicBlock::iterator(Inst)); - } - if (!AllDominate) - break; - if (BetterPos) - IP = BetterPos; - else - IP = Tentative; - } + IP = HoistInsertPosition(IP, Inputs); // Don't insert instructions before PHI nodes. while (isa(IP)) ++IP; @@ -2897,7 +2935,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Determine an input position which will be dominated by the operands and // which will dominate the result. - IP = AdjustInputPositionForExpand(IP, LF, LU); + IP = AdjustInsertPositionForExpand(IP, LF, LU); // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. @@ -3175,6 +3213,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) : IU(P->getAnalysis()), SE(P->getAnalysis()), DT(P->getAnalysis()), + LI(P->getAnalysis()), TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. @@ -3331,9 +3370,10 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved(); AU.addPreserved("domfrontier"); + AU.addRequired(); + AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); AU.addPreserved(); diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll index e72364f0359..dca97e9ad18 100644 --- a/test/Transforms/LoopStrengthReduce/uglygep.ll +++ b/test/Transforms/LoopStrengthReduce/uglygep.ll @@ -35,3 +35,33 @@ bb14: ; preds = %bb14, %bb10 store i8 undef, i8* %t9 br label %bb14 } + +define fastcc void @TransformLine() nounwind { +bb: + br label %loop0 + +loop0: ; preds = %loop0, %bb + %i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ] ; [#uses=2] + %i0.next = add i32 %i0, 1 ; [#uses=1] + br i1 false, label %loop0, label %bb0 + +bb0: ; preds = %loop0 + br label %loop1 + +loop1: ; preds = %bb5, %bb0 + %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ] ; [#uses=4] + %t0 = add i32 %i0, %i1 ; [#uses=1] + br i1 false, label %bb2, label %bb6 + +bb2: ; preds = %loop1 + br i1 true, label %bb6, label %bb5 + +bb5: ; preds = %bb2 + %i1.next = add i32 %i1, 1 ; [#uses=1] + br i1 true, label %bb6, label %loop1 + +bb6: ; preds = %bb5, %bb2, %loop1 + %p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; [#uses=0] + %p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; [#uses=0] + unreachable +}