vml[as].f32 cause stalls in following advanced SIMD instructions. Avoid using

author Jim Grosbach <grosbach@apple.com>

Sat, 31 Oct 2009 22:57:36 +0000 (22:57 +0000)

committer Jim Grosbach <grosbach@apple.com>

Sat, 31 Oct 2009 22:57:36 +0000 (22:57 +0000)
author Jim Grosbach <grosbach@apple.com>
Sat, 31 Oct 2009 22:57:36 +0000 (22:57 +0000)
committer Jim Grosbach <grosbach@apple.com>
Sat, 31 Oct 2009 22:57:36 +0000 (22:57 +0000)
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 822950c52836ed2111275efe483f89f0c25162e9..d22ec353fd03cc4a0c879c46e80d2b15c6bfd55f 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2841,13 +2841,16 @@ def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
  def : N3VDsPat<fmul, VMULfd_sfp>;
  
  // Vector Multiply-Accumulate/Subtract used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
-def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
+// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
  
-let neverHasSideEffects = 1 in
-def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
-def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//let neverHasSideEffects = 1 in
+//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
+//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+
+//let neverHasSideEffects = 1 in
+//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
+//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
  
  // Vector Absolute used for single-precision FP
  let neverHasSideEffects = 1 in
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll

index 1a1cd0747b49891452157e046b96c19b41375a11..5c31ea641de49b0187333d1f3474079dc440b7b1 100644 (file)
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,7 +1,7 @@
  ; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
  ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
  ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
  
  define float @test(float %acc, float %a, float %b) {
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll

index 969eb7274670a8a1b46d16a8dccef07244243c14..8fc13e78bc30c334b66b214d11ac242d1cb916df 100644 (file)
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -7,8 +7,10 @@ entry:
  ; VFP2: fnmacs
  ; NEON: fnmacs
  
-; NEONFP:     vmls
+; NEONFP-NOT: vmls
  ; NEONFP-NOT: fcpys
+; NEONFP:     vmul.f32
+; NEONFP:     vsub.f32
  ; NEONFP:     fmrs
  
         %0 = fmul float %a, %b
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll

index ea88a2680f356dc9da7ecfc5034ee641d317d58f..4320328e9c102de56ef286b0575292c885218bc6 100644 (file)
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 5
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
  
  define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
  entry:
author	Jim Grosbach <grosbach@apple.com>
	Sat, 31 Oct 2009 22:57:36 +0000 (22:57 +0000)
committer	Jim Grosbach <grosbach@apple.com>
	Sat, 31 Oct 2009 22:57:36 +0000 (22:57 +0000)
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
test/CodeGen/ARM/fmacs.ll		patch \| blob \| history
test/CodeGen/ARM/fnmacs.ll		patch \| blob \| history
test/CodeGen/Thumb2/cross-rc-coalescing-2.ll		patch \| blob \| history