[AArch64] Fix FMLS scalar-indexed-from-2s-after-neg patterns.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Fri, 14 Aug 2015 22:06:05 +0000 (22:06 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Fri, 14 Aug 2015 22:06:05 +0000 (22:06 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Fri, 14 Aug 2015 22:06:05 +0000 (22:06 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Fri, 14 Aug 2015 22:06:05 +0000 (22:06 +0000)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index 349431eec9b377fc8c2dbd82f5305eef02c72c60..77293726f50ed1817a20918a69c4f65e69f690be 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4438,7 +4438,9 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
              (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
                  V128:$Rm, VectorIndexS:$idx)>;
    def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
-                         (vector_extract (v2f32 (fneg V64:$Rm)),
+                         (vector_extract (v4f32 (insert_subvector undef,
+                                                    (v2f32 (fneg V64:$Rm)),
+                                                    (i32 0))),
                                           VectorIndexS:$idx))),
              (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
                  (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll

index 869966caa3ae3ee7ae446f2ad293d27ab03e22c2..985b5bf483acd7ee242966357f83776d080b8c06 100644 (file)
--- a/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -535,6 +535,17 @@ entry:
  
  declare double @llvm.fma.f64(double, double, double)
  
+define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmss_lane_f32
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %extract.rhs = extractelement <2 x float> %v, i32 1
+  %extract = fsub float -0.000000e+00, %extract.rhs
+  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
+  ret float %0
+}
+
  define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
  ; CHECK-LABEL: test_vfmss_laneq_f32
  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
@@ -557,6 +568,50 @@ entry:
    ret double %0
  }
  
+define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) {
+; CHCK-LABEL: test_vfmsd_lane_f64_0
+; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHCK-NEXT: ret
+entry:
+  %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v
+  %tmp1 = extractelement <1 x double> %tmp0, i32 0
+  %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %0
+}
+
+define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmss_lane_f32_0
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %tmp1 = extractelement <2 x float> %tmp0, i32 1
+  %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %0
+}
+
+define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmss_laneq_f32_0
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %tmp1 = extractelement <4 x float> %tmp0, i32 3
+  %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %0
+}
+
+define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmsd_laneq_f64_0
+; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v
+  %tmp1 = extractelement <2 x double> %tmp0, i32 1
+  %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %0
+}
+
  define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
  ; CHECK-LABEL: test_vmlal_lane_s16:
  ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Fri, 14 Aug 2015 22:06:05 +0000 (22:06 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Fri, 14 Aug 2015 22:06:05 +0000 (22:06 +0000)
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/arm64-neon-2velem.ll		patch \| blob \| history