From: Chris Lattner Date: Sun, 5 Sep 2010 20:22:09 +0000 (+0000) Subject: update this. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f0f5780b39f332ac14b0d85b904126dab7c783c3;p=oota-llvm.git update this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113116 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index f96b22f1e20..b2116e03b14 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) { //===---------------------------------------------------------------------===// SSE has instructions for doing operations on complex numbers, we should pattern -match them. Compiling this: +match them. For example, this should turn into a horizontal add: + +typedef float __attribute__((vector_size(16))) v4f32; +float f32(v4f32 A) { + return A[0]+A[1]+A[2]+A[3]; +} + +Instead we get this: + +_f32: ## @f32 + pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0] + addss %xmm0, %xmm1 + pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0] + movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1] + movaps %xmm0, %xmm3 + addss %xmm1, %xmm3 + movdqa %xmm2, %xmm0 + addss %xmm3, %xmm0 + ret + +Also, there are cases where some simple local SLP would improve codegen a bit. +compiling this: _Complex float f32(_Complex float A, _Complex float B) { return A+B; @@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) { into: -_f32: +_f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 - pshufd $16, %xmm2, %xmm2 - pshufd $1, %xmm1, %xmm1 - pshufd $1, %xmm0, %xmm0 - addss %xmm1, %xmm0 - pshufd $16, %xmm0, %xmm1 - movdqa %xmm2, %xmm0 - unpcklps %xmm1, %xmm0 + pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0] + pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0] + addss %xmm1, %xmm3 + movaps %xmm2, %xmm0 + unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ret -seems silly. +seems silly when it could just be one addps. //===---------------------------------------------------------------------===//