+//===---------------------------------------------------------------------===//
+
+We should inline lrintf and probably other libc functions.
+
+//===---------------------------------------------------------------------===//
+
+Start using the flags more. For example, compile:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x += y) == 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+ addl %esi, (%rdi)
+ movl %edx, %eax
+ cmovne %ecx, %eax
+ ret
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ movl %esi, (%rdi)
+ testl %esi, %esi
+ cmove %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+and:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x + y) < 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+
+add_zf:
+ addl (%rdi), %esi
+ movl %edx, %eax
+ cmovns %ecx, %eax
+ ret
+
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ testl %esi, %esi
+ cmovs %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This:
+#include <math.h>
+int foo(double X) { return isnan(X); }
+
+compiles to (-m64):
+
+_foo:
+ pxor %xmm1, %xmm1
+ ucomisd %xmm1, %xmm0
+ setp %al
+ movzbl %al, %eax
+ ret
+
+the pxor is not needed, we could compare the value against itself.
+
+//===---------------------------------------------------------------------===//
+
+These two functions have identical effects:
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+
+We currently compile them to:
+
+_f:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ movl 8(%esp), %edx
+ cmpl %edx, %ecx
+ jne LBB1_2 #UnifiedReturnBlock
+LBB1_1: #cond_true
+ addl $2, %eax
+ ret
+LBB1_2: #UnifiedReturnBlock
+ movl %ecx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ cmpl 8(%esp), %ecx
+ sete %cl
+ movzbl %cl, %ecx
+ leal 1(%ecx,%eax), %eax
+ ret
+
+both of which are inferior to GCC's:
+
+_f:
+ movl 4(%esp), %edx
+ leal 1(%edx), %eax
+ addl $2, %edx
+ cmpl 8(%esp), %eax
+ cmove %edx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ addl $1, %eax
+ xorl %edx, %edx
+ cmpl 8(%esp), %eax
+ sete %dl
+ addl %edx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void test(int X) {
+ if (X) abort();
+}
+
+is currently compiled to:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne LBB1_1
+ addl $12, %esp
+ ret
+LBB1_1:
+ call L_abort$stub
+
+It would be better to produce:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne L_abort$stub
+ addl $12, %esp
+ ret
+
+This can be applied to any no-return function call that takes no arguments etc.
+Alternatively, the stack save/restore logic could be shrink-wrapped, producing
+something like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ subl $12, %esp
+ call L_abort$stub
+
+Both are useful in different situations. Finally, it could be shrink-wrapped
+and tail called, like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ pop %eax # realign stack.
+ call L_abort$stub
+
+Though this probably isn't worth it.
+
+//===---------------------------------------------------------------------===//
+
+We need to teach the codegen to convert two-address INC instructions to LEA
+when the flags are dead. For example, on X86-64, compile:
+
+int foo(int A, int B) {
+ return A+1;
+}
+
+to:
+
+_foo:
+ leal 1(%edi), %eax
+ ret
+
+instead of:
+
+_foo:
+ incl %edi
+ movl %edi, %eax
+ ret
+
+Another example is:
+
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y. Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered. However, this copy is not needed if the register
+;; allocator turns the shift into an LEA. This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: not grep {mov E.X, E.X}
+
+%G = external global int
+
+int %test1(int %X, int %Y) {
+ %Z = add int %X, %Y
+ volatile store int %Y, int* %G
+ volatile store int %Z, int* %G
+ ret int %X
+}
+
+int %test2(int %X) {
+ %Z = add int %X, 1 ;; inc
+ volatile store int %Z, int* %G
+ ret int %X
+}
+
+//===---------------------------------------------------------------------===//
+
+This:
+#include <xmmintrin.h>
+unsigned test(float f) {
+ return _mm_cvtsi128_si32( (__m128i) _mm_set_ss( f ));
+}
+
+Compiles to:
+_test:
+ movss 4(%esp), %xmm0
+ movd %xmm0, %eax
+ ret
+
+it should compile to a move from the stack slot directly into eax. DAGCombine
+has this xform, but it is currently disabled until the alignment fields of
+the load/store nodes are trustworthy.
+
+//===---------------------------------------------------------------------===//
+
+Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
+a neg instead of a sub instruction. Consider:
+
+int test(char X) { return 7-X; }
+
+we currently produce:
+_test:
+ movl $7, %eax
+ movsbl 4(%esp), %ecx
+ subl %ecx, %eax
+ ret
+
+We would use one fewer register if codegen'd as:
+
+ movsbl 4(%esp), %eax
+ neg %eax
+ add $7, %eax
+ ret
+
+Note that this isn't beneficial if the load can be folded into the sub. In
+this case, we want a sub:
+
+int test(int X) { return 7-X; }
+_test:
+ movl $7, %eax
+ subl 4(%esp), %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+For code like:
+phi (undef, x)
+
+We get an implicit def on the undef side. If the phi is spilled, we then get:
+implicitdef xmm1
+store xmm1 -> stack
+
+It should be possible to teach the x86 backend to "fold" the store into the
+implicitdef, which just deletes the implicit def.
+
+These instructions should go away:
+#IMPLICIT_DEF %xmm1
+movaps %xmm1, 192(%esp)
+movaps %xmm1, 224(%esp)
+movaps %xmm1, 176(%esp)
+
+//===---------------------------------------------------------------------===//
+
+This is a "commutable two-address" register coallescing deficiency:
+
+define <4 x float> @test1(<4 x float> %V) {
+entry:
+ %tmp8 = shufflevector <4 x float> %V, <4 x float> undef, <4 x i32> < i32 3, i32 2, i32 1, i32 0 > ; <<4 x float>> [#uses=1]
+ %add = add <4 x float> %tmp8, %V ; <<4 x float>> [#uses=1]
+ ret <4 x float> %add
+}
+
+this codegens to:
+
+_test1:
+ pshufd $27, %xmm0, %xmm1
+ addps %xmm0, %xmm1
+ movaps %xmm1, %xmm0
+ ret
+
+instead of:
+
+_test1:
+ pshufd $27, %xmm0, %xmm1
+ addps %xmm1, %xmm0
+ ret
+