Use new EVT::vAny type to combine Neon intrinsics for VPADD.

author Bob Wilson <bob.wilson@apple.com>

Tue, 11 Aug 2009 01:15:26 +0000 (01:15 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Tue, 11 Aug 2009 01:15:26 +0000 (01:15 +0000)
author Bob Wilson <bob.wilson@apple.com>
Tue, 11 Aug 2009 01:15:26 +0000 (01:15 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Tue, 11 Aug 2009 01:15:26 +0000 (01:15 +0000)
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td

index 2a31c504e12941a1189d850c0c9b04b218891e26..d86dd087107ccf2c6c46092e2e9bc1c4c977ce65 100644 (file)
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -42,6 +42,9 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
    class Neon_2Arg_Float_Intrinsic
      : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
                  [IntrNoMem]>;
+  class Neon_2Arg_Vector_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+                [IntrNoMem]>;
    class Neon_2Arg_Narrow_Intrinsic
      : Intrinsic<[llvm_anyint_ty],
                  [LLVMExtendedElementVectorType<0>,
@@ -194,8 +197,7 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic;
  def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
  
  // Vector Pairwise Add.
-def int_arm_neon_vpaddi : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpaddf : Neon_2Arg_Float_Intrinsic;
+def int_arm_neon_vpadd : Neon_2Arg_Vector_Intrinsic;
  
  // Vector Pairwise Add Long.
  // Note: This is different than the other "long" NEON intrinsics because
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 938ddcf606ccbb84574d4b08c1f68b4608dec822..1ed3a619a0f5c7b2fa7b99a5fda3572c3366ed0b 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1261,13 +1261,13 @@ def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32,
  
  //   VPADD    : Vector Pairwise Add
  def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8,
-                        int_arm_neon_vpaddi, 0>;
+                        int_arm_neon_vpadd, 0>;
  def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16,
-                        int_arm_neon_vpaddi, 0>;
+                        int_arm_neon_vpadd, 0>;
  def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32,
-                        int_arm_neon_vpaddi, 0>;
+                        int_arm_neon_vpadd, 0>;
  def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32,
-                        int_arm_neon_vpaddf, 0>;
+                        int_arm_neon_vpadd, 0>;
  
  //   VPADDL   : Vector Pairwise Add Long
  defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll

index baff49227e64ef2f3169b8173592dd056f7e1d8e..b551fc0f6ab6dee74efff7010ed2df2aee629564 100644 (file)
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -7,33 +7,33 @@
  define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-       %tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+       %tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
  }
  
  define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-       %tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+       %tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
  }
  
  define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-       %tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+       %tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
  }
  
  define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
  }
  
-declare <8 x i8>  @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
author	Bob Wilson <bob.wilson@apple.com>
	Tue, 11 Aug 2009 01:15:26 +0000 (01:15 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Tue, 11 Aug 2009 01:15:26 +0000 (01:15 +0000)
include/llvm/IntrinsicsARM.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
test/CodeGen/ARM/vpadd.ll		patch \| blob \| history