[FastISel][X86] Fix smul.with.overflow.i8 lowering.

author Juergen Ributzka <juergen@apple.com>

Mon, 7 Jul 2014 21:52:21 +0000 (21:52 +0000)

committer Juergen Ributzka <juergen@apple.com>

Mon, 7 Jul 2014 21:52:21 +0000 (21:52 +0000)
author Juergen Ributzka <juergen@apple.com>
Mon, 7 Jul 2014 21:52:21 +0000 (21:52 +0000)
committer Juergen Ributzka <juergen@apple.com>
Mon, 7 Jul 2014 21:52:21 +0000 (21:52 +0000)
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp

index d9f8967dbf0c8cd781d6fdbf1077c2dfcf08937a..ce554ba21d6388733d68ec5b6e808d4c7a82a0fc 100644 (file)
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2402,7 +2402,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
      case Intrinsic::usub_with_overflow:
        BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
      case Intrinsic::smul_with_overflow:
-      BaseOpc = ISD::MUL; CondOpc = X86::SETOr; break;
+      BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
      case Intrinsic::umul_with_overflow:
        BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
      }
@@ -2430,10 +2430,11 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
                                RHSIsKill);
      }
  
-    // FastISel doesn't have a pattern for X86::MUL*r. Emit it manually.
+    // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
+    // it manually.
      if (BaseOpc == X86ISD::UMUL && !ResultReg) {
        static const unsigned MULOpc[] =
-      { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
+        { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
        static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
        // First copy the first operand into RAX, which is an implicit input to
        // the X86::MUL*r instruction.
@@ -2442,6 +2443,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
          .addReg(LHSReg, getKillRegState(LHSIsKill));
        ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
                                   TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
+    } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
+      static const unsigned MULOpc[] =
+        { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
+      if (VT == MVT::i8) {
+        // Copy the first operand into AL, which is an implicit input to the
+        // X86::IMUL8r instruction.
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+               TII.get(TargetOpcode::COPY), X86::AL)
+          .addReg(LHSReg, getKillRegState(LHSIsKill));
+        ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+                                   RHSIsKill);
+      } else
+        ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+                                    TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
+                                    RHSReg, RHSIsKill);
      }
  
      if (!ResultReg)
diff --git a/test/CodeGen/X86/xaluo.ll b/test/CodeGen/X86/xaluo.ll

index c236f809368ed001e3db88a2237a03b7f89d1785..f078631c2b33f6352c3a6db9cb260ac4afd271ad 100644 (file)
--- a/test/CodeGen/X86/xaluo.ll
+++ b/test/CodeGen/X86/xaluo.ll
@@ -261,6 +261,34 @@ entry:
  }
  
  ; SMULO
+define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
+entry:
+; FAST-LABEL:   smulo.i8
+; FAST:         movb %dil, %al
+; FAST-NEXT:    imulb %sil
+; FAST-NEXT:    seto %cl
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL:    smulo.i16
+; DAG:          imulw %si, %di
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   smulo.i16
+; FAST:         imulw %si, %di
+; FAST-NEXT:    seto %al
+  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
  define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
  entry:
  ; DAG-LABEL:    smulo.i32
@@ -292,6 +320,34 @@ entry:
  }
  
  ; UMULO
+define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
+entry:
+; FAST-LABEL:   umulo.i8
+; FAST:         movb %dil, %al
+; FAST-NEXT:    mulb %sil
+; FAST-NEXT:    seto %cl
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL:    umulo.i16
+; DAG:          mulw %si
+; DAG-NEXT:     seto
+; FAST-LABEL:   umulo.i16
+; FAST:         mulw %si
+; FAST-NEXT:    seto
+  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
  define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
  entry:
  ; DAG-LABEL:    umulo.i32
@@ -665,7 +721,7 @@ continue:
    ret i1 true
  }
  
-declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+declare {i8,  i1} @llvm.sadd.with.overflow.i8 (i8,  i8 ) nounwind readnone
  declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
  declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
  declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
@@ -675,8 +731,12 @@ declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
  declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
  declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
  declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8,  i1} @llvm.smul.with.overflow.i8 (i8,  i8 ) nounwind readnone
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
  declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
  declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8,  i1} @llvm.umul.with.overflow.i8 (i8,  i8 ) nounwind readnone
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
  declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
  declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
author	Juergen Ributzka <juergen@apple.com>
	Mon, 7 Jul 2014 21:52:21 +0000 (21:52 +0000)
committer	Juergen Ributzka <juergen@apple.com>
	Mon, 7 Jul 2014 21:52:21 +0000 (21:52 +0000)
lib/Target/X86/X86FastISel.cpp		patch \| blob \| history
test/CodeGen/X86/xaluo.ll		patch \| blob \| history