From: Dan Gohman Date: Wed, 24 Jun 2009 01:18:18 +0000 (+0000) Subject: Extend ScalarEvolution's multiple-exit support to compute exact X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=40a5a1b39ee1cd40ff9d04740386b667fb27b340;p=oota-llvm.git Extend ScalarEvolution's multiple-exit support to compute exact trip counts in more cases. Generalize ScalarEvolution's isLoopGuardedByCond code to recognize And and Or conditions, splitting the code out into an isNecessaryCond helper function so that it can evaluate Ands and Ors recursively, and make SCEVExpander be much more aggressive about hoisting instructions out of loops. test/CodeGen/X86/pr3495.ll has an additional instruction now, but it appears to be due to an arbitrary register allocation difference. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74048 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index cc9f9ccd553..4286ec336be 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -334,6 +334,12 @@ namespace llvm { /// found. BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); + /// isNecessaryCond - Test whether the given CondValue value is a condition + /// which is at least as strict as the one described by Pred, LHS, and RHS. + bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse); + /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 436b79dc07e..d1f6679a437 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -2813,7 +2813,6 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { const SCEV* BECount = CouldNotCompute; const SCEV* MaxBECount = CouldNotCompute; bool CouldNotComputeBECount = false; - bool CouldNotComputeMaxBECount = false; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BackedgeTakenInfo NewBTI = ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); @@ -2826,25 +2825,13 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { } else if (!CouldNotComputeBECount) { if (BECount == CouldNotCompute) BECount = NewBTI.Exact; - else { - // TODO: More analysis could be done here. For example, a - // loop with a short-circuiting && operator has an exact count - // of the min of both sides. - CouldNotComputeBECount = true; - BECount = CouldNotCompute; - } - } - if (NewBTI.Max == CouldNotCompute) { - // We couldn't compute an maximum value for this exit, so - // we won't be able to compute an maximum value for the loop. - CouldNotComputeMaxBECount = true; - MaxBECount = CouldNotCompute; - } else if (!CouldNotComputeMaxBECount) { - if (MaxBECount == CouldNotCompute) - MaxBECount = NewBTI.Max; else - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max); + BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); } + if (MaxBECount == CouldNotCompute) + MaxBECount = NewBTI.Max; + else if (NewBTI.Max != CouldNotCompute) + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); } return BackedgeTakenInfo(BECount, MaxBECount); @@ -2925,9 +2912,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB) { - // Check if the controlling expression for this loop is an and or or. In - // such cases, an exact backedge-taken count may be infeasible, but a - // maximum count may still be feasible. + // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. @@ -3899,88 +3884,111 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, LoopEntryPredicate->isUnconditional()) continue; - ICmpInst *ICI = dyn_cast(LoopEntryPredicate->getCondition()); - if (!ICI) continue; + if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, + LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) + return true; + } - // Now that we found a conditional branch that dominates the loop, check to - // see if it is the comparison we are looking for. - Value *PreCondLHS = ICI->getOperand(0); - Value *PreCondRHS = ICI->getOperand(1); - ICmpInst::Predicate Cond; - if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest) - Cond = ICI->getPredicate(); - else - Cond = ICI->getInversePredicate(); + return false; +} - if (Cond == Pred) - ; // An exact match. - else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) - ; // The actual condition is beyond sufficient. - else - // Check a few special cases. - switch (Cond) { - case ICmpInst::ICMP_UGT: - if (Pred == ICmpInst::ICMP_ULT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_ULT; - break; - } - continue; - case ICmpInst::ICMP_SGT: - if (Pred == ICmpInst::ICMP_SLT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_SLT; +/// isNecessaryCond - Test whether the given CondValue value is a condition +/// which is at least as strict as the one described by Pred, LHS, and RHS. +bool ScalarEvolution::isNecessaryCond(Value *CondValue, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse) { + // Recursivly handle And and Or conditions. + if (BinaryOperator *BO = dyn_cast(CondValue)) { + if (BO->getOpcode() == Instruction::And) { + if (!Inverse) + return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + } else if (BO->getOpcode() == Instruction::Or) { + if (Inverse) + return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + } + } + + ICmpInst *ICI = dyn_cast(CondValue); + if (!ICI) return false; + + // Now that we found a conditional branch that dominates the loop, check to + // see if it is the comparison we are looking for. + Value *PreCondLHS = ICI->getOperand(0); + Value *PreCondRHS = ICI->getOperand(1); + ICmpInst::Predicate Cond; + if (Inverse) + Cond = ICI->getInversePredicate(); + else + Cond = ICI->getPredicate(); + + if (Cond == Pred) + ; // An exact match. + else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) + ; // The actual condition is beyond sufficient. + else + // Check a few special cases. + switch (Cond) { + case ICmpInst::ICMP_UGT: + if (Pred == ICmpInst::ICMP_ULT) { + std::swap(PreCondLHS, PreCondRHS); + Cond = ICmpInst::ICMP_ULT; + break; + } + return false; + case ICmpInst::ICMP_SGT: + if (Pred == ICmpInst::ICMP_SLT) { + std::swap(PreCondLHS, PreCondRHS); + Cond = ICmpInst::ICMP_SLT; + break; + } + return false; + case ICmpInst::ICMP_NE: + // Expressions like (x >u 0) are often canonicalized to (x != 0), + // so check for this case by checking if the NE is comparing against + // a minimum or maximum constant. + if (!ICmpInst::isTrueWhenEqual(Pred)) + if (ConstantInt *CI = dyn_cast(PreCondRHS)) { + const APInt &A = CI->getValue(); + switch (Pred) { + case ICmpInst::ICMP_SLT: + if (A.isMaxSignedValue()) break; + return false; + case ICmpInst::ICMP_SGT: + if (A.isMinSignedValue()) break; + return false; + case ICmpInst::ICMP_ULT: + if (A.isMaxValue()) break; + return false; + case ICmpInst::ICMP_UGT: + if (A.isMinValue()) break; + return false; + default: + return false; + } + Cond = ICmpInst::ICMP_NE; + // NE is symmetric but the original comparison may not be. Swap + // the operands if necessary so that they match below. + if (isa(LHS)) + std::swap(PreCondLHS, PreCondRHS); break; } - continue; - case ICmpInst::ICMP_NE: - // Expressions like (x >u 0) are often canonicalized to (x != 0), - // so check for this case by checking if the NE is comparing against - // a minimum or maximum constant. - if (!ICmpInst::isTrueWhenEqual(Pred)) - if (ConstantInt *CI = dyn_cast(PreCondRHS)) { - const APInt &A = CI->getValue(); - switch (Pred) { - case ICmpInst::ICMP_SLT: - if (A.isMaxSignedValue()) break; - continue; - case ICmpInst::ICMP_SGT: - if (A.isMinSignedValue()) break; - continue; - case ICmpInst::ICMP_ULT: - if (A.isMaxValue()) break; - continue; - case ICmpInst::ICMP_UGT: - if (A.isMinValue()) break; - continue; - default: - continue; - } - Cond = ICmpInst::ICMP_NE; - // NE is symmetric but the original comparison may not be. Swap - // the operands if necessary so that they match below. - if (isa(LHS)) - std::swap(PreCondLHS, PreCondRHS); - break; - } - continue; - default: - // We weren't able to reconcile the condition. - continue; - } - - if (!PreCondLHS->getType()->isInteger()) continue; + return false; + default: + // We weren't able to reconcile the condition. + return false; + } - const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS); - const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS); - if ((HasSameValue(LHS, PreCondLHSSCEV) && - HasSameValue(RHS, PreCondRHSSCEV)) || - (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && - HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)))) - return true; - } + if (!PreCondLHS->getType()->isInteger()) return false; - return false; + const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS); + const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS); + return (HasSameValue(LHS, PreCondLHSSCEV) && + HasSameValue(RHS, PreCondRHSSCEV)) || + (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && + HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))); } /// getBECount - Subtract the end and start values and divide by the step, diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index c5591d70273..6d7abc02ebe 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -51,21 +51,26 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V, if (Argument *A = dyn_cast(V)) { // Check to see if there is already a cast! for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { + UI != E; ++UI) if ((*UI)->getType() == Ty) if (CastInst *CI = dyn_cast(cast(*UI))) if (CI->getOpcode() == opcode) { // If the cast isn't the first instruction of the function, move it. - if (BasicBlock::iterator(CI) != + if (BasicBlock::iterator(CI) != A->getParent()->getEntryBlock().begin()) { - // If the CastInst is the insert point, change the insert point. - if (CI == InsertPt) ++InsertPt; - // Splice the cast at the beginning of the entry block. - CI->moveBefore(A->getParent()->getEntryBlock().begin()); + // Recreate the cast at the beginning of the entry block. + // The old cast is left in place in case it is being used + // as an insert point. + Instruction *NewCI = + CastInst::Create(opcode, V, Ty, "", + A->getParent()->getEntryBlock().begin()); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + return NewCI; } return CI; } - } + Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(), A->getParent()->getEntryBlock().begin()); InsertedValues.insert(I); @@ -85,10 +90,13 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V, It = cast(I)->getNormalDest()->begin(); while (isa(It)) ++It; if (It != BasicBlock::iterator(CI)) { - // If the CastInst is the insert point, change the insert point. - if (CI == InsertPt) ++InsertPt; - // Splice the cast immediately after the operand in question. - CI->moveBefore(It); + // Recreate the cast at the beginning of the entry block. + // The old cast is left in place in case it is being used + // as an insert point. + Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + return NewCI; } return CI; } @@ -497,8 +505,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } } - Value *RestV = expand(Rest); - return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV))); + // Just do a normal add. Pre-expand the operands to suppress folding. + return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), + SE.getUnknown(expand(Rest)))); } // {0,+,1} --> Insert a canonical induction variable into the loop! @@ -546,36 +555,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { getOrInsertCanonicalInductionVariable(L, Ty); // If this is a simple linear addrec, emit it now as a special case. - if (S->isAffine()) { // {0,+,F} --> i*F - Value *F = expandCodeFor(S->getOperand(1), Ty); - - // If the insert point is directly inside of the loop, emit the multiply at - // the insert point. Otherwise, L is a loop that is a parent of the insert - // point loop. If we can, move the multiply to the outer most loop that it - // is safe to be in. - BasicBlock::iterator MulInsertPt = getInsertionPoint(); - Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent()); - if (InsertPtLoop != L && InsertPtLoop && - L->contains(InsertPtLoop->getHeader())) { - do { - // If we cannot hoist the multiply out of this loop, don't. - if (!InsertPtLoop->isLoopInvariant(F)) break; - - BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader(); - - // If this loop hasn't got a preheader, we aren't able to hoist the - // multiply. - if (!InsertPtLoopPH) - break; - - // Otherwise, move the insert point to the preheader. - MulInsertPt = InsertPtLoopPH->getTerminator(); - InsertPtLoop = InsertPtLoop->getParentLoop(); - } while (InsertPtLoop != L); - } - - return InsertBinop(Instruction::Mul, I, F, MulInsertPt); - } + if (S->isAffine()) // {0,+,F} --> i*F + return + expand(SE.getTruncateOrNoop( + SE.getMulExpr(SE.getUnknown(I), + SE.getNoopOrAnyExtend(S->getOperand(1), + I->getType())), + Ty)); // If this is a chain of recurrences, turn it into a closed form, using the // folders, then expandCodeFor the closed form. This allows the folders to @@ -671,8 +657,31 @@ Value *SCEVExpander::expand(const SCEV *S) { InsertedExpressions.find(S); if (I != InsertedExpressions.end()) return I->second; - + + // Compute an insertion point for this SCEV object. Hoist the instructions + // as far out in the loop nest as possible. + BasicBlock::iterator InsertPt = getInsertionPoint(); + BasicBlock::iterator SaveInsertPt = InsertPt; + for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ; + L = L->getParentLoop()) + if (S->isLoopInvariant(L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && S->hasComputableLoopEvolution(L)) + InsertPt = L->getHeader()->getFirstNonPHI(); + while (isInsertedInstruction(InsertPt)) ++InsertPt; + break; + } + setInsertionPoint(InsertPt); + Value *V = visit(S); + + setInsertionPoint(SaveInsertPt); InsertedExpressions[S] = V; return V; } @@ -686,6 +695,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isInteger() && "Can only insert integer induction variables!"); const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), - SE.getIntegerSCEV(1, Ty), L); - return expand(H); + SE.getIntegerSCEV(1, Ty), L); + BasicBlock::iterator SaveInsertPt = getInsertionPoint(); + Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); + setInsertionPoint(SaveInsertPt); + return V; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6c20e7d1407..658b788e0ba 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -104,7 +104,8 @@ namespace { void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount); void RewriteIVExpressions(Loop *L, const Type *LargestType, - SCEVExpander &Rewriter); + SCEVExpander &Rewriter, + BasicBlock::iterator InsertPt); void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter); @@ -170,6 +171,8 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, } // Expand the code for the iteration count into the preheader of the loop. + assert(RHS->isLoopInvariant(L) && + "Computed iteration count is not loop invariant!"); BasicBlock *Preheader = L->getLoopPreheader(); Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), Preheader->getTerminator()); @@ -434,10 +437,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ExitingBlock, BI, Rewriter); } - Rewriter.setInsertionPoint(Header->getFirstNonPHI()); + BasicBlock::iterator InsertPt = Header->getFirstNonPHI(); // Rewrite IV-derived expressions. Clears the rewriter cache. - RewriteIVExpressions(L, LargestType, Rewriter); + RewriteIVExpressions(L, LargestType, Rewriter, InsertPt); // The Rewriter may only be used for isInsertedInstruction queries from this // point on. @@ -462,7 +465,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { } void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, - SCEVExpander &Rewriter) { + SCEVExpander &Rewriter, + BasicBlock::iterator InsertPt) { SmallVector DeadInsts; // Rewrite all induction variable expressions in terms of the canonical @@ -488,29 +492,17 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // Compute the final addrec to expand into code. const SCEV* AR = IU->getReplacementExpr(*UI); - Value *NewVal = 0; - if (AR->isLoopInvariant(L)) { - BasicBlock::iterator I = Rewriter.getInsertionPoint(); - // Expand loop-invariant values in the loop preheader. They will - // be sunk to the exit block later, if possible. - NewVal = - Rewriter.expandCodeFor(AR, UseTy, - L->getLoopPreheader()->getTerminator()); - Rewriter.setInsertionPoint(I); - ++NumReplaced; - } else { - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!Stride->isLoopInvariant(L)) - continue; - - // Now expand it into actual Instructions and patch it into place. - NewVal = Rewriter.expandCodeFor(AR, UseTy); - } + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + continue; + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); // Patch the new value into place. if (Op->hasName()) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 312b957948b..a877c4ea03c 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -409,16 +409,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, const SCEV* NewValSCEV = SE->getUnknown(Base); - // If there is no immediate value, skip the next part. - if (!Imm->isZero()) { - // If we are inserting the base and imm values in the same block, make sure - // to adjust the IP position if insertion reused a result. - if (IP == BaseInsertPt) - IP = Rewriter.getInsertionPoint(); - - // Always emit the immediate (if non-zero) into the same block as the user. - NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); - } + // Always emit the immediate into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll index 62382c6d78c..ca6204c101e 100644 --- a/test/CodeGen/X86/pr3495.ll +++ b/test/CodeGen/X86/pr3495.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2 ; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies} -; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38 +; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39 ; PR3495 ; The loop reversal kicks in once here, resulting in one fewer instruction.