case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
- BaseOpc = ISD::MUL; CondOpc = X86::SETOr; break;
+ BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
case Intrinsic::umul_with_overflow:
BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
}
RHSIsKill);
}
- // FastISel doesn't have a pattern for X86::MUL*r. Emit it manually.
+ // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
+ // it manually.
if (BaseOpc == X86ISD::UMUL && !ResultReg) {
static const unsigned MULOpc[] =
- { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
+ { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
// First copy the first operand into RAX, which is an implicit input to
// the X86::MUL*r instruction.
.addReg(LHSReg, getKillRegState(LHSIsKill));
ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
+ } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
+ if (VT == MVT::i8) {
+ // Copy the first operand into AL, which is an implicit input to the
+ // X86::IMUL8r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), X86::AL)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+ RHSIsKill);
+ } else
+ ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
}
if (!ResultReg)
}
; SMULO
+define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
+entry:
+; FAST-LABEL: smulo.i8
+; FAST: movb %dil, %al
+; FAST-NEXT: imulb %sil
+; FAST-NEXT: seto %cl
+ %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL: smulo.i16
+; DAG: imulw %si, %di
+; DAG-NEXT: seto %al
+; FAST-LABEL: smulo.i16
+; FAST: imulw %si, %di
+; FAST-NEXT: seto %al
+ %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
; DAG-LABEL: smulo.i32
}
; UMULO
+define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
+entry:
+; FAST-LABEL: umulo.i8
+; FAST: movb %dil, %al
+; FAST-NEXT: mulb %sil
+; FAST-NEXT: seto %cl
+ %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL: umulo.i16
+; DAG: mulw %si
+; DAG-NEXT: seto
+; FAST-LABEL: umulo.i16
+; FAST: mulw %si
+; FAST-NEXT: seto
+ %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
; DAG-LABEL: umulo.i32
ret i1 true
}
-declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+declare {i8, i1} @llvm.sadd.with.overflow.i8 (i8, i8 ) nounwind readnone
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone