X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FREADME-MMX.txt;h=a6c8616b6d2c6b77a897de92552ef3111b20d25b;hb=d9e3385ced2dc887e2fe8e1c071bd2611e4d3ede;hp=b4886aed2739af8d1a9893ab005aa1820e801863;hpb=a348c56fdee38b4d52c4e54ca9d8bea799dda345;p=oota-llvm.git diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt index b4886aed273..a6c8616b6d2 100644 --- a/lib/Target/X86/README-MMX.txt +++ b/lib/Target/X86/README-MMX.txt @@ -4,56 +4,68 @@ //===---------------------------------------------------------------------===// -We should compile +This: #include -extern __m64 C; - -void baz(__v2si *A, __v2si *B) -{ - *A = __builtin_ia32_psllq(*B, C); - _mm_empty(); +__v2si qux(int A) { + return (__v2si){ 0, A }; } -to: +is compiled into: + +_qux: + subl $28, %esp + movl 32(%esp), %eax + movd %eax, %mm0 + movq %mm0, (%esp) + movl (%esp), %eax + movl %eax, 20(%esp) + movq %mm0, 8(%esp) + movl 12(%esp), %eax + movl %eax, 16(%esp) + movq 16(%esp), %mm0 + addl $28, %esp + ret + +Yuck! + +GCC gives us: + +_qux: + subl $12, %esp + movl 16(%esp), %eax + movl 20(%esp), %edx + movl $0, (%eax) + movl %edx, 4(%eax) + addl $12, %esp + ret $4 + +//===---------------------------------------------------------------------===// + +We generate crappy code for this: + +__m64 t() { + return _mm_cvtsi32_si64(1); +} -.globl _baz -_baz: - call L3 -"L00000000001$pb": -L3: - popl %ecx +_t: subl $12, %esp - movl 20(%esp), %eax - movq (%eax), %mm0 - movl L_C$non_lazy_ptr-"L00000000001$pb"(%ecx), %eax - movq (%eax), %mm1 - movl 16(%esp), %eax - psllq %mm1, %mm0 - movq %mm0, (%eax) - emms + movl $1, %eax + movd %eax, %mm0 + movq %mm0, (%esp) + movl (%esp), %eax + movl 4(%esp), %edx addl $12, %esp ret -not: - -_baz: - subl $12, %esp - call "L1$pb" -"L1$pb": - popl %eax - movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax - movl (%eax), %ecx - movl %ecx, (%esp) - movl 4(%eax), %eax - movl %eax, 4(%esp) - movl 20(%esp), %eax - movq (%eax), %mm0 - movq (%esp), %mm1 - psllq %mm1, %mm0 - movl 16(%esp), %eax - movq %mm0, (%eax) - emms - addl $12, %esp +The extra stack traffic is covered in the previous entry. But the other reason +is we are not smart about materializing constants in MMX registers. With -m64 + + movl $1, %eax + movd %eax, %mm0 + movd %mm0, %rax ret + +We should be using a constantpool load instead: + movq LC0(%rip), %rax