Type *WideType;
bool IsSigned;
+ // True if the narrow induction variable is never negative. Tracking this
+ // information lets us use a sign extension instead of a zero extension or
+ // vice versa, when profitable and legal.
+ bool NeverNegative;
+
// Context
LoopInfo *LI;
Loop *L;
OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
IsSigned(WI.IsSigned),
+ NeverNegative(false),
LI(LInfo),
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
if (!Cmp)
return false;
- // Sign of IV user and compare must match.
- if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+
+ if (!(NeverNegative || IsSigned == Cmp->isSigned()))
return false;
Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
// Widen the other operand of the compare, if necessary.
if (CastWidth < IVWidth) {
- Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+ Value *ExtOp = getExtend(Op, WideType, Cmp->isSigned(), Cmp);
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
}
return true;
if (!AddRec)
return nullptr;
+ NeverNegative = SE->isKnownPredicate(ICmpInst::ICMP_SGE, AddRec,
+ SE->getConstant(AddRec->getType(), 0));
+
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
SE->getSignExtendExpr(AddRec, WideType) :
for.end:
ret i32 %sum.0
}
+
+define i32 @test6(i32* %a, i32 %b) {
+; CHECK-LABEL: @test6(
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ br label %for.cond
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp sle i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret i32 %sum.0
+}
+
+define i32 @test7(i32* %a, i32 %b) {
+; CHECK-LABEL: @test7(
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+; CHECK: for.body:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ br label %for.cond
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ule i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp sle i32 %i.0, %b
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+}
+
+define i32 @test8(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test8(
+; CHECK: [[INIT_SEXT:%[a-z0-9]+]] = sext i32 %init to i64
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: for.cond:
+; Note: %indvars.iv is the sign extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_SEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+
+entry:
+ %e = icmp sgt i32 %init, 0
+ br i1 %e, label %for.cond, label %leave
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ule i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp slt i32 0, %inc
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+
+leave:
+ ret i32 0
+}
+
+define i32 @test9(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test9(
+; CHECK: [[INIT_ZEXT:%[a-z0-9]+]] = zext i32 %init to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; Note: %indvars.iv is the zero extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_ZEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp slt i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ %e = icmp sgt i32 %init, 0
+ br i1 %e, label %for.cond, label %leave
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp slt i32 0, %inc
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+
+leave:
+ ret i32 0
+}