DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
- // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
- // about the narrow operand yet so must insert a [sz]ext. It is probably loop
- // invariant and will be folded or hoisted. If it actually comes from a
- // widened IV, it should be removed during a future call to widenIVUse.
- Value *LHS =
- (NarrowUse->getOperand(0) == NarrowDef)
- ? WideDef
- : getExtend(NarrowUse->getOperand(0), WideType, IsSigned, NarrowUse);
- Value *RHS =
- (NarrowUse->getOperand(1) == NarrowDef)
- ? WideDef
- : getExtend(NarrowUse->getOperand(1), WideType, IsSigned, NarrowUse);
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse = nullptr;
+
+ switch (NarrowUse->getOpcode()) {
+ default:
+ llvm_unreachable("No other possibility!");
+
+ case Instruction::Add:
+ WideUse = SE->getAddExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Mul:
+ WideUse = SE->getMulExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::UDiv:
+ WideUse = SE->getUDivExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Sub:
+ WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
+ break;
+ }
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = IsSigned;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
+ }
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : getExtend(NarrowUse->getOperand(0), WideType, SignExtend,
+ NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : getExtend(NarrowUse->getOperand(1), WideType, SignExtend,
+ NarrowUse);
auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
NarrowBO->getName());
+
IRBuilder<> Builder(NarrowUse);
Builder.Insert(WideBO);
if (const auto *OBO = dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
target triple = "x86_64-apple-darwin"
-; CHECK-LABEL: @sloop
+; CHECK-LABEL: @loop_0
; CHECK-LABEL: B18:
; Only one phi now.
; CHECK: phi
; One trunc for the dummy() call.
; CHECK-LABEL: exit24:
; CHECK: trunc i64 {{.*}}lcssa.wide to i32
-define void @sloop(i32* %a) {
+define void @loop_0(i32* %a) {
Prologue:
br i1 undef, label %B18, label %B6
unreachable
}
+define void @loop_1(i32 %lim) {
+; CHECK-LABEL: @loop_1(
+ entry:
+ %entry.cond = icmp ne i32 %lim, 0
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+; CHECK: %indvars.iv = phi i64 [ 1, %loop.preheader ], [ %indvars.iv.next, %loop ]
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: [[IV_INC:%[^ ]+]] = add nsw i64 %indvars.iv, -1
+; CHECK: call void @dummy.i64(i64 [[IV_INC]])
+
+ %iv = phi i32 [ 1, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ %iv.inc.sub = add i32 %iv, -1
+ %iv.inc.sub.zext = zext i32 %iv.inc.sub to i64
+ call void @dummy.i64(i64 %iv.inc.sub.zext)
+ %be.cond = icmp ult i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
declare void @dummy(i32)
+declare void @dummy.i64(i64)