Remove the pmulld intrinsic and autoupdate it as a vector multiply.

author Eric Christopher <echristo@apple.com>

Tue, 30 Mar 2010 18:49:01 +0000 (18:49 +0000)

committer Eric Christopher <echristo@apple.com>

Tue, 30 Mar 2010 18:49:01 +0000 (18:49 +0000)
author Eric Christopher <echristo@apple.com>
Tue, 30 Mar 2010 18:49:01 +0000 (18:49 +0000)
committer Eric Christopher <echristo@apple.com>
Tue, 30 Mar 2010 18:49:01 +0000 (18:49 +0000)
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td

index d6e1db429922789efddf09e11557281489550a9f..6be6eb16dfee0d3c9e7bbb6e9017f9e6a8457bbb 100644 (file)
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -810,9 +810,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
    def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">,
                Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                          [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pmulld          : GCCBuiltin<"__builtin_ia32_pmulld128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
  }
  
  // Vector extract
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 5def78737f88244670e845ab9a13a032805cda89..614a21182bcda6e6cbbcd09a76b203519584cfe8 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::PMULHUWrr,       X86::PMULHUWrm, 16 },
      { X86::PMULHWrr,        X86::PMULHWrm, 16 },
      { X86::PMULLDrr,        X86::PMULLDrm, 16 },
-    { X86::PMULLDrr_int,    X86::PMULLDrm_int, 16 },
      { X86::PMULLWrr,        X86::PMULLWrm, 16 },
      { X86::PMULUDQrr,       X86::PMULUDQrm, 16 },
      { X86::PORrr,           X86::PORrm, 16 },
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 077d92de4f8b6bf5274db27db0046960fe28266f..e207598144140100c177a3628bb768ac5c976a65 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3448,8 +3448,28 @@ let Constraints = "$src1 = $dst" in {
                         OpSize;
    }
  }
-defm PMULLD       : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
-                                       int_x86_sse41_pmulld, 1>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+let Constraints = "$src1 = $dst" in {
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                        ValueType OpVT, bit Commutable = 0> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 
+                                 (ins VR128:$src1, VR128:$src2),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+               [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+               OpSize {
+    let isCommutable = Commutable;
+  }
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 
+                                 (ins VR128:$src1, i128mem:$src2),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+               [(set VR128:$dst, (OpNode VR128:$src1,
+                                  (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+               OpSize;
+}
+}
+
+defm PMULLD         : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
  
  /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
  let Constraints = "$src1 = $dst" in {
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp

index 5e4c9fb76624ddadfc1cc8eae178c29d0c2da476..b9aa5c34675b9878cc5748e7f60bed60f9b55234 100644 (file)
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -225,7 +225,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
        // Calls to these intrinsics are transformed into ShuffleVector's.
        NewFn = 0;
        return true;
+    } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
+      // Calls to these intrinsics are transformed into vector multiplies.
+      NewFn = 0;
+      return true;
      }
+    
  
      break;
    }
@@ -355,6 +360,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
        
        //  Clean up the old call now that it has been completely upgraded.
        CI->eraseFromParent();
+    } else if (F->getName() == "llvm.x86.sse41.pmulld") {
+      // Upgrade this set of intrinsics into vector multiplies.
+      Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
+                                                   CI->getOperand(2),
+                                                   CI->getName(),
+                                                   CI);
+      // Fix up all the uses with our new multiply.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Mul);
+        
+      // Remove upgraded multiply.
+      CI->eraseFromParent();
      } else {
        llvm_unreachable("Unknown function for CallInst upgrade.");
      }
diff --git a/test/Bitcode/sse41_pmulld.ll b/test/Bitcode/sse41_pmulld.ll

new file mode 100644 (file)

index 0000000..caf8547
--- /dev/null
+++ b/test/Bitcode/sse41_pmulld.ll
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
+; RUN: llvm-dis < %s.bc | grep mul
+\ No newline at end of file
diff --git a/test/Bitcode/sse41_pmulld.ll.bc b/test/Bitcode/sse41_pmulld.ll.bc

new file mode 100644 (file)

index 0000000..bd66f0a

Binary files /dev/null and b/test/Bitcode/sse41_pmulld.ll.bc differ
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll

index e2746a8c0638e213e977ca996f2b51bc8e58e7c6..bf5229aa1ee8639443d7fade7dd700e54917e697 100644 (file)
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,6 @@
  ; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
  ; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 12
+; RUN: grep mov %t | count 11
  
  define <4 x i32> @a(<4 x i32> %i) nounwind  {
          %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll

new file mode 100644 (file)

index 0000000..3ef5941
--- /dev/null
+++ b/test/CodeGen/X86/pmulld.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK-NEXT: pmulld
+  %C = mul <4 x i32> %A, %B
+  ret <4 x i32> %C
+}
+
+define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
+; CHECK: test1a:
+; CHECK-NEXT: pmulld
+  %B = load <4 x i32>* %Bp
+  %C = mul <4 x i32> %A, %B
+  ret <4 x i32> %C
+}
author	Eric Christopher <echristo@apple.com>
	Tue, 30 Mar 2010 18:49:01 +0000 (18:49 +0000)
committer	Eric Christopher <echristo@apple.com>
	Tue, 30 Mar 2010 18:49:01 +0000 (18:49 +0000)
include/llvm/IntrinsicsX86.td		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
lib/VMCore/AutoUpgrade.cpp		patch \| blob \| history
test/Bitcode/sse41_pmulld.ll	[new file with mode: 0644]	patch \| blob
test/Bitcode/sse41_pmulld.ll.bc	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/pmul.ll		patch \| blob \| history
test/CodeGen/X86/pmulld.ll	[new file with mode: 0644]	patch \| blob