Add NEON single-precision FP support for fabs and fneg.

author David Goodwin <david_goodwin@apple.com>

Tue, 4 Aug 2009 20:39:05 +0000 (20:39 +0000)

committer David Goodwin <david_goodwin@apple.com>

Tue, 4 Aug 2009 20:39:05 +0000 (20:39 +0000)
author David Goodwin <david_goodwin@apple.com>
Tue, 4 Aug 2009 20:39:05 +0000 (20:39 +0000)
committer David Goodwin <david_goodwin@apple.com>
Tue, 4 Aug 2009 20:39:05 +0000 (20:39 +0000)
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td

index de2bb78bb419ad194de47570bce32ebd9d1a6981..ce39a3f737676af471de3a2191c9b99c8edb975f 100644 (file)
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1071,6 +1071,14 @@ class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
    let Inst{7-4}   = opcod3;
  }
  
+// Single precision, unary if no NEON
+// Same as ASuI except not available if NEON is enabled
+class ASuIn<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+           string opc, string asm, list<dag> pattern>
+  : ASuI<opcod1, opcod2, opcod2, oops, iops, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
  // Single precision, binary
  class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
             string asm, list<dag> pattern>
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 147490c78434633ed6a97c52f8698a2109c1ea3b..f36e3269b9b7f324797e87a4bdfc7b2037c125dc 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -246,6 +246,12 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
          (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
          [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
  
+// Basic 2-register operations, scalar single-precision
+class N2VDInts<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a)),
+         (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
+          arm_ssubreg_0)>;
+
  // Narrow 2-register intrinsics.
  class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -1338,6 +1344,7 @@ def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
                          v2f32, v2f32, int_arm_neon_vabsf>;
  def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
                          v4f32, v4f32, int_arm_neon_vabsf>;
+def : N2VDInts<fabs, VABSfd>;
  
  //   VQABS    : Vector Saturating Absolute Value
  defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
@@ -1372,6 +1379,7 @@ def  VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
  def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
                      (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "",
                      [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
+def : N2VDInts<fneg, VNEGf32d>;
  
  def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
  def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index 923606d9772ffbff5e0820265ada7793d328cb3a..20aff3704dad0b7b1056a198534296d716bbe87e 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -168,9 +168,9 @@ def FABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
                   "fabsd", " $dst, $a",
                   [(set DPR:$dst, (fabs DPR:$a))]>;
  
-def FABSS  : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                 "fabss", " $dst, $a",
-                 [(set SPR:$dst, (fabs SPR:$a))]>;
+def FABSS  : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+                  "fabss", " $dst, $a",
+                  [(set SPR:$dst, (fabs SPR:$a))]>;
  
  let Defs = [FPSCR] in {
  def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
@@ -208,9 +208,9 @@ def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
                   "fnegd", " $dst, $a",
                   [(set DPR:$dst, (fneg DPR:$a))]>;
  
-def FNEGS  : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                 "fnegs", " $dst, $a",
-                 [(set SPR:$dst, (fneg SPR:$a))]>;
+def FNEGS  : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+                  "fnegs", " $dst, $a",
+                  [(set SPR:$dst, (fneg SPR:$a))]>;
  
  def FSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
                   "fsqrtd", " $dst, $a",
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll

new file mode 100644 (file)

index 0000000..4b5bd13
--- /dev/null
+++ b/test/CodeGen/ARM/fabss.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+        %dum = fadd float %a, %b
+       %0 = tail call float @fabsf(float %dum)
+        %dum1 = fadd float %0, %b
+       ret float %dum1
+}
+
+declare float @fabsf(float)
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll

new file mode 100644 (file)

index 0000000..ff171e1
--- /dev/null
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+
+define float @test1(float* %a) {
+entry:
+       %0 = load float* %a, align 4            ; <float> [#uses=2]
+       %1 = fsub float -0.000000e+00, %0               ; <float> [#uses=2]
+       %2 = fpext float %1 to double           ; <double> [#uses=1]
+       %3 = fcmp olt double %2, 1.234000e+00           ; <i1> [#uses=1]
+       %retval = select i1 %3, float %1, float %0              ; <float> [#uses=1]
+       ret float %retval
+}
+
+define float @test2(float* %a) {
+entry:
+       %0 = load float* %a, align 4            ; <float> [#uses=2]
+       %1 = fmul float -1.000000e+00, %0               ; <float> [#uses=2]
+       %2 = fpext float %1 to double           ; <double> [#uses=1]
+       %3 = fcmp olt double %2, 1.234000e+00           ; <i1> [#uses=1]
+       %retval = select i1 %3, float %1, float %0              ; <float> [#uses=1]
+       ret float %retval
+}
author	David Goodwin <david_goodwin@apple.com>
	Tue, 4 Aug 2009 20:39:05 +0000 (20:39 +0000)
committer	David Goodwin <david_goodwin@apple.com>
	Tue, 4 Aug 2009 20:39:05 +0000 (20:39 +0000)
lib/Target/ARM/ARMInstrFormats.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
test/CodeGen/ARM/fabss.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fnegs.ll	[new file with mode: 0644]	patch \| blob