From eb490a7aa34c873b1bc401eb2c45a4d71a62be37 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 25 Jul 2009 01:22:26 +0000 Subject: [PATCH] Teach ScalarEvolution to make use of no-overflow flags when analyzing add recurrences. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 39 +++++++++++++++++++++++++-- test/Analysis/ScalarEvolution/nsw.ll | 40 ++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 test/Analysis/ScalarEvolution/nsw.ll diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index c77c7c40ef1..49af5793662 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -734,6 +734,13 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoUnsignedOverflow()) + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is @@ -866,6 +873,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoSignedOverflow()) + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is @@ -2344,8 +2358,29 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { cast(Accum)->getLoop() == L)) { const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); - const SCEV *PHISCEV = - getAddRecExpr(StartVal, Accum, L); + const SCEVAddRecExpr *PHISCEV = + cast(getAddRecExpr(StartVal, Accum, L)); + + // If the increment doesn't overflow, then neither the addrec nor the + // post-increment will overflow. + if (const AddOperator *OBO = dyn_cast(BEValueV)) + if (OBO->getOperand(0) == PN && + getSCEV(OBO->getOperand(1)) == + PHISCEV->getStepRecurrence(*this)) { + const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this); + if (OBO->hasNoUnsignedOverflow()) { + const_cast(PHISCEV) + ->setHasNoUnsignedOverflow(true); + const_cast(PostInc) + ->setHasNoUnsignedOverflow(true); + } + if (OBO->hasNoSignedOverflow()) { + const_cast(PHISCEV) + ->setHasNoSignedOverflow(true); + const_cast(PostInc) + ->setHasNoSignedOverflow(true); + } + } // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll new file mode 100644 index 00000000000..245ed6f36be --- /dev/null +++ b/test/Analysis/ScalarEvolution/nsw.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep { --> {.*,+,.*}} | count 8 + +; The addrecs in this loop are analyzable only by using nsw information. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + +define void @foo(double* %p) nounwind { +entry: + %tmp = load double* %p, align 8 ; [#uses=1] + %tmp1 = fcmp ogt double %tmp, 2.000000e+00 ; [#uses=1] + br i1 %tmp1, label %bb.nph, label %return + +bb.nph: ; preds = %entry + br label %bb + +bb: ; preds = %bb1, %bb.nph + %i.01 = phi i32 [ %tmp8, %bb1 ], [ 0, %bb.nph ] ; [#uses=3] + %tmp2 = sext i32 %i.01 to i64 ; [#uses=1] + %tmp3 = getelementptr double* %p, i64 %tmp2 ; [#uses=1] + %tmp4 = load double* %tmp3, align 8 ; [#uses=1] + %tmp5 = fmul double %tmp4, 9.200000e+00 ; [#uses=1] + %tmp6 = sext i32 %i.01 to i64 ; [#uses=1] + %tmp7 = getelementptr double* %p, i64 %tmp6 ; [#uses=1] + store double %tmp5, double* %tmp7, align 8 + %tmp8 = nsw add i32 %i.01, 1 ; [#uses=2] + br label %bb1 + +bb1: ; preds = %bb + %phitmp = sext i32 %tmp8 to i64 ; [#uses=1] + %tmp9 = getelementptr double* %p, i64 %phitmp ; [#uses=1] + %tmp10 = load double* %tmp9, align 8 ; [#uses=1] + %tmp11 = fcmp ogt double %tmp10, 2.000000e+00 ; [#uses=1] + br i1 %tmp11, label %bb, label %bb1.return_crit_edge + +bb1.return_crit_edge: ; preds = %bb1 + br label %return + +return: ; preds = %bb1.return_crit_edge, %entry + ret void +} -- 2.34.1