we should pattern match the SSE complex arithmetic ops.

author Chris Lattner <sabre@nondot.org>

Wed, 25 Aug 2010 23:31:42 +0000 (23:31 +0000)

committer Chris Lattner <sabre@nondot.org>

Wed, 25 Aug 2010 23:31:42 +0000 (23:31 +0000)
author Chris Lattner <sabre@nondot.org>
Wed, 25 Aug 2010 23:31:42 +0000 (23:31 +0000)
committer Chris Lattner <sabre@nondot.org>
Wed, 25 Aug 2010 23:31:42 +0000 (23:31 +0000)
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt

index 85cdd17781985cc78558af9a88b3a78e07d58839..f96b22f1e204214aa1de06f4aabced0f4a94c078 100644 (file)
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -17,6 +17,32 @@ __m128i shift_right(__m128i value, unsigned long offset) {
                 _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
  }
  
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them.  Compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+  return A+B;
+}
+
+into:
+
+_f32:
+       movdqa  %xmm0, %xmm2
+       addss   %xmm1, %xmm2
+       pshufd  $16, %xmm2, %xmm2
+       pshufd  $1, %xmm1, %xmm1
+       pshufd  $1, %xmm0, %xmm0
+       addss   %xmm1, %xmm0
+       pshufd  $16, %xmm0, %xmm1
+       movdqa  %xmm2, %xmm0
+       unpcklps        %xmm1, %xmm0
+       ret
+
+seems silly. 
+
+
  //===---------------------------------------------------------------------===//
  
  Expand libm rounding functions inline:  Significant speedups possible.
author	Chris Lattner <sabre@nondot.org>
	Wed, 25 Aug 2010 23:31:42 +0000 (23:31 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Wed, 25 Aug 2010 23:31:42 +0000 (23:31 +0000)