From f5a309e989b8d2199cb542793e9edf48395d9fed Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 18 Feb 2009 17:22:41 +0000 Subject: [PATCH] Use a sign-extend instead of a zero-extend when promoting a trip count value when the original loop iteration condition is signed and the canonical induction variable won't undergo signed overflow. This isn't required for correctness; it just preserves more information about original loop iteration values. Add a getTruncateOrSignExtend method to ScalarEvolution, following getTruncateOrZeroExtend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@64918 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ScalarEvolution.h | 5 ++ lib/Analysis/ScalarEvolution.cpp | 15 ++++++ lib/Transforms/Scalar/IndVarSimplify.cpp | 54 +++++++++++++------ .../IndVarSimplify/preserve-signed-wrap.ll | 3 +- .../promote-iv-to-eliminate-casts.ll | 7 +-- .../IndVarSimplify/signed-trip-count.ll | 31 +++++++++++ 6 files changed, 96 insertions(+), 19 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/signed-trip-count.ll diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 963117fdd56..b001077ca7f 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -264,6 +264,11 @@ namespace llvm { /// extended, it is zero extended. SCEVHandle getTruncateOrZeroExtend(const SCEVHandle &V, const Type *Ty); + /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion + /// of the input value to the specified type. If the type must be + /// extended, it is sign extended. + SCEVHandle getTruncateOrSignExtend(const SCEVHandle &V, const Type *Ty); + /// getIntegerSCEV - Given an integer or FP type, create a constant for the /// specified signed integer value and return a SCEV for the constant. SCEVHandle getIntegerSCEV(int Val, const Type *Ty); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 54f27653542..98668493434 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -755,6 +755,21 @@ SCEVHandle ScalarEvolution::getTruncateOrZeroExtend(const SCEVHandle &V, return getZeroExtendExpr(V, Ty); } +/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion +/// of the input value to the specified type. If the type must be +/// extended, it is sign extended. +SCEVHandle ScalarEvolution::getTruncateOrSignExtend(const SCEVHandle &V, + const Type *Ty) { + const Type *SrcTy = V->getType(); + assert(SrcTy->isInteger() && Ty->isInteger() && + "Cannot truncate or sign extend with non-integer arguments!"); + if (SrcTy->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + return V; // No conversion + if (SrcTy->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()) + return getTruncateExpr(V, Ty); + return getSignExtendExpr(V, Ty); +} + // get - Get a canonical add expression, or something simpler if possible. SCEVHandle ScalarEvolution::getAddExpr(std::vector &Ops) { assert(!Ops.empty() && "Cannot get empty add!"); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index e31b514b2fa..3e9615c849a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -96,7 +96,8 @@ namespace { Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, - SCEVExpander &Rewriter); + SCEVExpander &Rewriter, + bool SignExtendTripCount); void RewriteLoopExitValues(Loop *L, SCEV *IterationCount); void DeleteTriviallyDeadInstructions(SmallPtrSet &Insts); @@ -235,7 +236,8 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, - SCEVExpander &Rewriter) { + SCEVExpander &Rewriter, + bool SignExtendTripCount) { // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. @@ -253,11 +255,18 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, if ((isa(N) && !N->isZero()) || SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { // No overflow. Cast the sum. - IterationCount = SE->getTruncateOrZeroExtend(N, IndVar->getType()); + if (SignExtendTripCount) + IterationCount = SE->getTruncateOrSignExtend(N, IndVar->getType()); + else + IterationCount = SE->getTruncateOrZeroExtend(N, IndVar->getType()); } else { // Potential overflow. Cast before doing the add. - IterationCount = SE->getTruncateOrZeroExtend(IterationCount, - IndVar->getType()); + if (SignExtendTripCount) + IterationCount = SE->getTruncateOrSignExtend(IterationCount, + IndVar->getType()); + else + IterationCount = SE->getTruncateOrZeroExtend(IterationCount, + IndVar->getType()); IterationCount = SE->getAddExpr(IterationCount, SE->getIntegerSCEV(1, IndVar->getType())); @@ -269,8 +278,12 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, CmpIndVar = L->getCanonicalInductionVariableIncrement(); } else { // We have to use the preincremented value... - IterationCount = SE->getTruncateOrZeroExtend(IterationCount, - IndVar->getType()); + if (SignExtendTripCount) + IterationCount = SE->getTruncateOrSignExtend(IterationCount, + IndVar->getType()); + else + IterationCount = SE->getTruncateOrZeroExtend(IterationCount, + IndVar->getType()); CmpIndVar = IndVar; } @@ -464,10 +477,13 @@ static const Type *getEffectiveIndvarType(const PHINode *Phi) { /// TestOrigIVForWrap - Analyze the original induction variable /// that controls the loop's iteration to determine whether it -/// would ever undergo signed or unsigned overflow. +/// would ever undergo signed or unsigned overflow. Also, check +/// whether an induction variable in the same type that starts +/// at 0 would undergo signed overflow. /// -/// In addition to setting the NoSignedWrap and NoUnsignedWrap -/// variables, return the PHI for this induction variable. +/// In addition to setting the NoSignedWrap, NoUnsignedWrap, and +/// SignExtendTripCount variables, return the PHI for this induction +/// variable. /// /// TODO: This duplicates a fair amount of ScalarEvolution logic. /// Perhaps this can be merged with ScalarEvolution::getIterationCount @@ -477,7 +493,8 @@ static const PHINode *TestOrigIVForWrap(const Loop *L, const BranchInst *BI, const Instruction *OrigCond, bool &NoSignedWrap, - bool &NoUnsignedWrap) { + bool &NoUnsignedWrap, + bool &SignExtendTripCount) { // Verify that the loop is sane and find the exit condition. const ICmpInst *Cmp = dyn_cast(OrigCond); if (!Cmp) return 0; @@ -590,9 +607,13 @@ static const PHINode *TestOrigIVForWrap(const Loop *L, // The original induction variable will start at some non-max value, // it counts up by one, and the loop iterates only while it remans // less than some value in the same type. As such, it will never wrap. - if (isSigned && !InitialVal->getValue().isMaxSignedValue()) + if (isSigned && !InitialVal->getValue().isMaxSignedValue()) { NoSignedWrap = true; - else if (!isSigned && !InitialVal->getValue().isMaxValue()) + // If the original induction variable starts at zero or greater, + // the trip count can be considered signed. + if (InitialVal->getValue().isNonNegative()) + SignExtendTripCount = true; + } else if (!isSigned && !InitialVal->getValue().isMaxValue()) NoUnsignedWrap = true; return PN; } @@ -678,6 +699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // using it. We can currently only handle loops with a single exit. bool NoSignedWrap = false; bool NoUnsignedWrap = false; + bool SignExtendTripCount = false; const PHINode *OrigControllingPHI = 0; if (!isa(IterationCount) && ExitingBlock) // Can't rewrite non-branch yet. @@ -686,14 +708,16 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Determine if the OrigIV will ever undergo overflow. OrigControllingPHI = TestOrigIVForWrap(L, BI, OrigCond, - NoSignedWrap, NoUnsignedWrap); + NoSignedWrap, NoUnsignedWrap, + SignExtendTripCount); // We'll be replacing the original condition, so it'll be dead. DeadInsts.insert(OrigCond); } LinearFunctionTestReplace(L, IterationCount, IndVar, - ExitingBlock, BI, Rewriter); + ExitingBlock, BI, Rewriter, + SignExtendTripCount); } // Now that we have a canonical induction variable, we can rewrite any diff --git a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll index 0a91ec88064..930721a85b4 100644 --- a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll +++ b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll @@ -1,5 +1,6 @@ ; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t -; RUN: grep sext %t | count 1 +; RUN: grep sext %t | count 2 +; RUN: grep { = sext i32 %n to i64} %t ; RUN: grep phi %t | count 1 ; RUN: grep {phi i64} %t diff --git a/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll index 08b08f200ec..9588bd30f2f 100644 --- a/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ b/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -1,6 +1,7 @@ -; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep sext - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t +; RUN: grep sext %t | count 2 +; RUN: grep { = sext i16 %N to i64} %t +; RUN: grep { = sext i32 %count to i64} %t define i64 @test(i64* nocapture %first, i32 %count) nounwind readonly { entry: diff --git a/test/Transforms/IndVarSimplify/signed-trip-count.ll b/test/Transforms/IndVarSimplify/signed-trip-count.ll new file mode 100644 index 00000000000..cea9f822455 --- /dev/null +++ b/test/Transforms/IndVarSimplify/signed-trip-count.ll @@ -0,0 +1,31 @@ +; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t +; RUN: grep { = sext i32 %n} %t +; RUN: grep phi %t | count 1 +; RUN: not grep zext %t + +define void @foo(i64* nocapture %x, i32 %n) nounwind { +entry: + %tmp102 = icmp sgt i32 %n, 0 ; [#uses=1] + br i1 %tmp102, label %bb.nph, label %return + +bb.nph: ; preds = %entry + br label %bb + +bb: ; preds = %bb7, %bb.nph + %i.01 = phi i32 [ %tmp6, %bb7 ], [ 0, %bb.nph ] ; [#uses=3] + %tmp1 = sext i32 %i.01 to i64 ; [#uses=1] + %tmp4 = getelementptr i64* %x, i32 %i.01 ; [#uses=1] + store i64 %tmp1, i64* %tmp4, align 8 + %tmp6 = add i32 %i.01, 1 ; [#uses=2] + br label %bb7 + +bb7: ; preds = %bb + %tmp10 = icmp slt i32 %tmp6, %n ; [#uses=1] + br i1 %tmp10, label %bb, label %bb7.return_crit_edge + +bb7.return_crit_edge: ; preds = %bb7 + br label %return + +return: ; preds = %bb7.return_crit_edge, %entry + ret void +} -- 2.34.1