From 1154be8198b280e74dbb9ae868a9428295a4f490 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Mon, 7 Jul 2014 21:52:21 +0000 Subject: [PATCH] [FastISel][X86] Fix smul.with.overflow.i8 lowering. Add custom lowering code for signed multiply instruction selection, because the default FastISel instruction selection for ISD::MUL will use unsigned multiply for the i8 type and signed multiply for all other types. This would set the incorrect flags for the overflow check. This fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212493 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 22 ++++++++++-- test/CodeGen/X86/xaluo.ll | 62 +++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index d9f8967dbf0..ce554ba21d6 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2402,7 +2402,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { case Intrinsic::usub_with_overflow: BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; case Intrinsic::smul_with_overflow: - BaseOpc = ISD::MUL; CondOpc = X86::SETOr; break; + BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; case Intrinsic::umul_with_overflow: BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; } @@ -2430,10 +2430,11 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { RHSIsKill); } - // FastISel doesn't have a pattern for X86::MUL*r. Emit it manually. + // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit + // it manually. if (BaseOpc == X86ISD::UMUL && !ResultReg) { static const unsigned MULOpc[] = - { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; + { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; // First copy the first operand into RAX, which is an implicit input to // the X86::MUL*r instruction. @@ -2442,6 +2443,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { .addReg(LHSReg, getKillRegState(LHSIsKill)); ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], TLI.getRegClassFor(VT), RHSReg, RHSIsKill); + } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { + static const unsigned MULOpc[] = + { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; + if (VT == MVT::i8) { + // Copy the first operand into AL, which is an implicit input to the + // X86::IMUL8r instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), X86::AL) + .addReg(LHSReg, getKillRegState(LHSIsKill)); + ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, + RHSIsKill); + } else + ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], + TLI.getRegClassFor(VT), LHSReg, LHSIsKill, + RHSReg, RHSIsKill); } if (!ResultReg) diff --git a/test/CodeGen/X86/xaluo.ll b/test/CodeGen/X86/xaluo.ll index c236f809368..f078631c2b3 100644 --- a/test/CodeGen/X86/xaluo.ll +++ b/test/CodeGen/X86/xaluo.ll @@ -261,6 +261,34 @@ entry: } ; SMULO +define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) { +entry: +; FAST-LABEL: smulo.i8 +; FAST: movb %dil, %al +; FAST-NEXT: imulb %sil +; FAST-NEXT: seto %cl + %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) + %val = extractvalue {i8, i1} %t, 0 + %obit = extractvalue {i8, i1} %t, 1 + store i8 %val, i8* %res + ret i1 %obit +} + +define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) { +entry: +; DAG-LABEL: smulo.i16 +; DAG: imulw %si, %di +; DAG-NEXT: seto %al +; FAST-LABEL: smulo.i16 +; FAST: imulw %si, %di +; FAST-NEXT: seto %al + %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) + %val = extractvalue {i16, i1} %t, 0 + %obit = extractvalue {i16, i1} %t, 1 + store i16 %val, i16* %res + ret i1 %obit +} + define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) { entry: ; DAG-LABEL: smulo.i32 @@ -292,6 +320,34 @@ entry: } ; UMULO +define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) { +entry: +; FAST-LABEL: umulo.i8 +; FAST: movb %dil, %al +; FAST-NEXT: mulb %sil +; FAST-NEXT: seto %cl + %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) + %val = extractvalue {i8, i1} %t, 0 + %obit = extractvalue {i8, i1} %t, 1 + store i8 %val, i8* %res + ret i1 %obit +} + +define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) { +entry: +; DAG-LABEL: umulo.i16 +; DAG: mulw %si +; DAG-NEXT: seto +; FAST-LABEL: umulo.i16 +; FAST: mulw %si +; FAST-NEXT: seto + %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) + %val = extractvalue {i16, i1} %t, 0 + %obit = extractvalue {i16, i1} %t, 1 + store i16 %val, i16* %res + ret i1 %obit +} + define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) { entry: ; DAG-LABEL: umulo.i32 @@ -665,7 +721,7 @@ continue: ret i1 true } -declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone +declare {i8, i1} @llvm.sadd.with.overflow.i8 (i8, i8 ) nounwind readnone declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone @@ -675,8 +731,12 @@ declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone +declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone +declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone -- 2.34.1