Initial support for single-precision FP using NEON. Added "neonfp" attribute to enabl...

author David Goodwin <david_goodwin@apple.com>

Tue, 4 Aug 2009 17:53:06 +0000 (17:53 +0000)

committer David Goodwin <david_goodwin@apple.com>

Tue, 4 Aug 2009 17:53:06 +0000 (17:53 +0000)
author David Goodwin <david_goodwin@apple.com>
Tue, 4 Aug 2009 17:53:06 +0000 (17:53 +0000)
committer David Goodwin <david_goodwin@apple.com>
Tue, 4 Aug 2009 17:53:06 +0000 (17:53 +0000)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td

index 8c987c268dfed30d2b6d8252b05e35ac369d3e98..172c7de6259e31cdb131c0956052edf00df48ad0 100644 (file)
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,6 +32,9 @@ def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
                                     "ARM v6t2">;
  def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
                                     "ARM v7A">;
+def FeatureNEONFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+                                     "true",
+                                     "Use NEON for single-precision FP">;
  def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
                                     "Enable VFP2 instructions">;
  def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td

index fe32c5f669b2126233ac685a5cfda8002ae6f03a..de2bb78bb419ad194de47570bce32ebd9d1a6981 100644 (file)
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1080,6 +1080,14 @@ class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
    let Inst{11-8}  = 0b1010;
  }
  
+// Single precision, binary if no NEON
+// Same as ASbI except not available if NEON is enabled
+class ASbIn<bits<8> opcod, dag oops, dag iops, string opc,
+            string asm, list<dag> pattern>
+  : ASbI<opcod, oops, iops, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
  // VFP conversion instructions
  class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
                 dag oops, dag iops, string opc, string asm, list<dag> pattern>
@@ -1220,3 +1228,9 @@ class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
  class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
              dag oops, dag iops, string opc, string asm, list<dag> pattern>
    : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, opc, asm, pattern>;
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index db3afba57bda5fb08f6dcbe754b9a052a18cbaec..e4a95a74e7941bc925728b92efdbf41fe33880b2 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -104,6 +104,8 @@ def HasV7     : Predicate<"Subtarget->hasV7Ops()">;
  def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
  def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
  def HasNEON   : Predicate<"Subtarget->hasNEON()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
  def IsThumb   : Predicate<"Subtarget->isThumb()">;
  def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
  def IsThumb2  : Predicate<"Subtarget->isThumb2()">;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 5e8a4b5994fa7d15dfbe9e8ff9115d1760cb150b..ec4702f5d69c98c5e4c2a2a79ac2b5c015ac0a5c 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -283,6 +283,13 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
    let isCommutable = Commutable;
  }
  
+// Basic 3-register operations, scalar single-precision
+class N3VDs<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+         (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
+                               (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
+          arm_ssubreg_0)>;
+
  // Basic 3-register intrinsics, both double- and quad-register.
  class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                string OpcodeStr, ValueType ResTy, ValueType OpTy,
@@ -319,6 +326,15 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
          [(set QPR:$dst, (Ty (OpNode QPR:$src1,
                               (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
  
+// Multiply-Add/Sub operations, scalar single-precision
+class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$acc, 
+                       (f32 (MulNode SPR:$a, SPR:$b)))),
+         (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
+                               (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
+                               (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
+          arm_ssubreg_0)>;
+
  // Neon 3-argument intrinsics, both double- and quad-register.
  // The destination register is also used as the first source operand register.
  class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -886,6 +902,9 @@ defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
  //   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
  defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
  
+// Vector Add Operations used for single-precision FP
+def : N3VDs<fadd, VADDfd>;
+
  // Vector Multiply Operations.
  
  //   VMUL     : Vector Multiply (integer, polynomial and floating-point)
@@ -908,6 +927,9 @@ def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8,
  //   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
  defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
  
+// Vector Multiply Operations used for single-precision FP
+def : N3VDs<fmul, VMULfd>;
+
  // Vector Multiply-Accumulate and Multiply-Subtract Operations.
  
  //   VMLA     : Vector Multiply Accumulate (integer and floating-point)
@@ -929,6 +951,9 @@ defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
  //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
  defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
  
+// Vector Multiply-Accumulate/Subtract used for single-precision FP
+def : N3VDMulOps<fmul, fadd, VMLAfd>;
+
  // Vector Subtract Operations.
  
  //   VSUB     : Vector Subtract (integer and floating-point)
@@ -952,6 +977,9 @@ defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
  //   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
  defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
  
+// Vector Sub Operations used for single-precision FP
+def : N3VDs<fsub, VSUBfd>;
+
  // Vector Comparisons.
  
  //   VCEQ     : Vector Compare Equal
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index a9b4a32f17fc47297fd01cbc26d694c6d8cba071..2ecf5f3a420441e9c194578867ee05cb0392c1b5 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -98,9 +98,9 @@ def FADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                   "faddd", " $dst, $a, $b",
                   [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
  
-def FADDS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fadds", " $dst, $a, $b",
-                 [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+def FADDS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  "fadds", " $dst, $a, $b",
+                  [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
  
  // These are encoded as unary instructions.
  let Defs = [FPSCR] in {
@@ -125,9 +125,9 @@ def FMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                   "fmuld", " $dst, $a, $b",
                   [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
  
-def FMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fmuls", " $dst, $a, $b",
-                 [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+def FMULS  : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  "fmuls", " $dst, $a, $b",
+                  [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
                   
  def FNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                    "fnmuld", " $dst, $a, $b",
@@ -154,9 +154,9 @@ def FSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
    let Inst{6} = 1;
  }
  
-def FSUBS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 "fsubs", " $dst, $a, $b",
-                 [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
+def FSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  "fsubs", " $dst, $a, $b",
+                  [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
    let Inst{6} = 1;
  }
  
@@ -317,10 +317,10 @@ def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                  [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                  RegConstraint<"$dstin = $dst">;
  
-def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                "fmacs", " $dst, $a, $b",
-                [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
+def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                 "fmacs", " $dst, $a, $b",
+                 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+                 RegConstraint<"$dstin = $dst">;
  
  def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                  "fmscd", " $dst, $a, $b",
@@ -339,8 +339,8 @@ def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
    let Inst{6} = 1;
  }
  
-def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                "fnmacs", " $dst, $a, $b",
+def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                  "fnmacs", " $dst, $a, $b",
               [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                  RegConstraint<"$dstin = $dst"> {
    let Inst{6} = 1;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp

index 71c77e10682c422fe1f6d14b819a81df057868c0..4e706c5b39c5a3761bc177c57fcb91dd2c2f7f5a 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -25,6 +25,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                             bool isThumb)
    : ARMArchVersion(V4T)
    , ARMFPUType(None)
+  , UseNEONForSinglePrecisionFP(false)
    , IsThumb(isThumb)
    , ThumbMode(Thumb1)
    , IsR9Reserved(ReserveR9)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index 4ec77ff93e6e92d7f11349c243f558d035449721..6d1ffc442069d931a871d05cc87fd3007494f57e 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -42,6 +42,9 @@ protected:
    /// ARMFPUType - Floating Point Unit type.
    ARMFPEnum ARMFPUType;
  
+  /// UseNEONForSinglePrecisionFP - if NEON is available use for FP
+  bool UseNEONForSinglePrecisionFP;
+
    /// IsThumb - True if we are in thumb mode, false if in ARM mode.
    bool IsThumb;
  
@@ -98,7 +101,9 @@ protected:
    bool hasVFP2() const { return ARMFPUType >= VFPv2; }
    bool hasVFP3() const { return ARMFPUType >= VFPv3; }
    bool hasNEON() const { return ARMFPUType >= NEON;  }
-
+  bool useNEONForSinglePrecisionFP() const { 
+    return hasNEON() && UseNEONForSinglePrecisionFP; }
+  
    bool isTargetDarwin() const { return TargetType == isDarwin; }
    bool isTargetELF() const { return TargetType == isELF; }
  
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll

new file mode 100644 (file)

index 0000000..35c74f7
--- /dev/null
+++ b/test/CodeGen/ARM/fadds.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+       %0 = fadd float %a, %b
+       ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll

new file mode 100644 (file)

index 0000000..9637ccb
--- /dev/null
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+       %0 = fdiv float %a, %b
+       ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll

new file mode 100644 (file)

index 0000000..24517e1
--- /dev/null
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+       %0 = fmul float %a, %b
+        %1 = fadd float %acc, %0
+       ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll

new file mode 100644 (file)

index 0000000..5338f44
--- /dev/null
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+       %0 = fmul float %a, %b
+        %1 = fsub float %0, %acc
+       ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll

new file mode 100644 (file)

index 0000000..24c04ab
--- /dev/null
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+       %0 = fmul float %a, %b
+       ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll

new file mode 100644 (file)

index 0000000..537c411
--- /dev/null
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -0,0 +1,12 @@
+; XFAIL: *
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+       %0 = fmul float %a, %b
+        %1 = fsub float %acc, %0
+       ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll

new file mode 100644 (file)

index 0000000..da3b95f
--- /dev/null
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -0,0 +1,13 @@
+; XFAIL: *
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+       %0 = fmul float %a, %b
+       %1 = fsub float 0.0, %0
+        %2 = fsub float %1, %acc
+       ret float %2
+}
+
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll

new file mode 100644 (file)

index 0000000..7130aa6
--- /dev/null
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -0,0 +1,12 @@
+; XFAIL: *
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+       %0 = fmul float %a, %b
+        %1 = fsub float 0.0, %0
+       ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll

new file mode 100644 (file)

index 0000000..e318237
--- /dev/null
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+       %0 = fsub float %a, %b
+       ret float %0
+}
+
author	David Goodwin <david_goodwin@apple.com>
	Tue, 4 Aug 2009 17:53:06 +0000 (17:53 +0000)
committer	David Goodwin <david_goodwin@apple.com>
	Tue, 4 Aug 2009 17:53:06 +0000 (17:53 +0000)
lib/Target/ARM/ARM.td		patch \| blob \| history
lib/Target/ARM/ARMInstrFormats.td		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
test/CodeGen/ARM/fadds.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fdivs.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fmacs.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fmscs.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fmuls.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fnmacs.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fnmscs.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fnmuls.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/fsubs.ll	[new file with mode: 0644]	patch \| blob