Apply the SSE dependence idiom for SSE unary operations to

author Dan Gohman <gohman@apple.com>

Mon, 12 Jul 2010 20:46:04 +0000 (20:46 +0000)

committer Dan Gohman <gohman@apple.com>

Mon, 12 Jul 2010 20:46:04 +0000 (20:46 +0000)
author Dan Gohman <gohman@apple.com>
Mon, 12 Jul 2010 20:46:04 +0000 (20:46 +0000)
committer Dan Gohman <gohman@apple.com>
Mon, 12 Jul 2010 20:46:04 +0000 (20:46 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 8e5f37c867b540e9022dd686305986bc383d678f..0d5d1b449e7dca2fc72e21692b8e4bd161116731 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1937,6 +1937,10 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
    def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                  !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                  [(set FR32:$dst, (OpNode FR32:$src))]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
    def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                  !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                  [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
@@ -1992,9 +1996,11 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
    def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
                  !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                  [(set FR64:$dst, (OpNode FR64:$src))]>;
-  def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+  // See the comments in sse1_fp_unop_s for why this is OptForSize.
+  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                  !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+                [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+            Requires<[HasSSE2, OptForSize]>;
    def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                      [(set VR128:$dst, (F64Int VR128:$src))]>;
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll

index 027d2f1dafc1da321954c338bf4038bd743a7e65..094cbc7bdefc07ec4dfac3df6001278540deb409 100644 (file)
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -19,3 +19,44 @@ entry:
    %1 = fptrunc double %0 to float
    ret float %1
  }
+
+define float @squirtf(float* %x) nounwind {
+entry:
+; CHECK: squirtf:
+; CHECK: movss (%rdi), %xmm0
+; CHECK: sqrtss %xmm0, %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt(double* %x) nounwind {
+entry:
+; CHECK: squirt:
+; CHECK: movsd (%rdi), %xmm0
+; CHECK: sqrtsd %xmm0, %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+define float @squirtf_size(float* %x) nounwind optsize {
+entry:
+; CHECK: squirtf_size:
+; CHECK: sqrtss (%rdi), %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt_size(double* %x) nounwind optsize {
+entry:
+; CHECK: squirt_size:
+; CHECK: sqrtsd (%rdi), %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
author	Dan Gohman <gohman@apple.com>
	Mon, 12 Jul 2010 20:46:04 +0000 (20:46 +0000)
committer	Dan Gohman <gohman@apple.com>
	Mon, 12 Jul 2010 20:46:04 +0000 (20:46 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/break-sse-dep.ll		patch \| blob \| history