// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ llvm::Value *IncrementedIndvar =
+ IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ const auto *IncrementedIndvarSCEV =
+ cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
+ // It is unsafe to use the incremented indvar if it has a wrapping flag, we
+ // don't want to compare against a poison value. Check the SCEV that
+ // corresponds to the incremented indvar, the SCEVExpander will only insert
+ // flags in the IR if the SCEV originally had wrapping flags.
+ // FIXME: In theory, SCEV could drop flags even though they exist in IR.
+ // A more robust solution would involve getting a new expression for
+ // CmpIndVar by applying non-NSW/NUW AddExprs.
+ auto WrappingFlags =
+ ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW);
+ const SCEV *IVInit = IncrementedIndvarSCEV->getStart();
+ if (SE->getTypeSizeInBits(IVInit->getType()) >
+ SE->getTypeSizeInBits(IVCount->getType()))
+ IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+ unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType());
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1);
+ // Check if InitIV + BECount+1 requires sign/zero extension.
+ // If not, clear the corresponding flag from WrappingFlags because it is not
+ // necessary for those flags in the IncrementedIndvarSCEV expression.
+ if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
+ WideTy) ==
+ SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy),
+ SE->getSignExtendExpr(BackedgeTakenCount, WideTy)))
+ WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW);
+ if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
+ WideTy) ==
+ SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy),
+ SE->getZeroExtendExpr(BackedgeTakenCount, WideTy)))
+ WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW);
+ if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
+ WrappingFlags)) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // This addition may overflow, which is valid as long as the comparison is
+ // truncated to BackedgeTakenCount->getType().
+ IVCount =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ CmpIndVar = IncrementedIndvar;
+ }
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8
; CHECK: for.body:
; CHECK: phi i8 addrspace(2)*
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16
; CHECK: for.body:
; CHECK: phi i8 addrspace(3)*
; Perform LFTR without generating extra preheader code.
define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
i32 %irow, i32 %ilead) nounwind {
-; CHECK: entry:
-; CHECK-NOT: zext
-; CHECK-NOT: add
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK: phi i64
+; CHECK-LABEL: @guardedloop(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i32 1, %irow
+; CHECK-NEXT: br i1 %[[cmp]], label %[[loop_preheader:.*]], label %[[return:.*]]
+
+; CHECK: [[loop_preheader]]:
+; CHECK-NEXT: %[[sext:.*]] = sext i32 %ilead to i64
+; CHECK-NEXT: %[[add:.*]] = add i32 %irow, -1
+; CHECK-NEXT: br label %[[loop:.*]]
+
+; CHECK: [[loop]]:
+; CHECK-NEXT: %[[indvars_iv2:.*]] = phi i64
+; CHECK-NEXT: phi i64
; CHECK-NOT: phi
-; CHECK: icmp ne
-; CHECK: br i1
+; CHECK: %[[lftr_wideiv:.*]] = trunc i64 %[[indvars_iv2]] to i32
+; CHECK-NEXT: %[[exitcond:.*]] = icmp ne i32 %[[lftr_wideiv]], %[[add]]
+; CHECK-NEXT: br i1 %[[exitcond]], label %[[loop]], label
entry:
%cmp = icmp slt i32 1, %irow
br i1 %cmp, label %loop, label %return