// Random ideas for the X86 backend.
//===---------------------------------------------------------------------===//
-We should add support for the "movbe" instruction, which does a byte-swapping
-copy (3-addr bswap + memory support?) This is available on Atom processors.
-
-//===---------------------------------------------------------------------===//
-
This should be one DIV/IDIV instruction, not a libcall:
unsigned test(unsigned long long X, unsigned Y) {
Some isel ideas:
-1. Dynamic programming based approach when compile time if not an
+1. Dynamic programming based approach when compile time is not an
issue.
2. Code duplication (addressing mode) during isel.
3. Other ideas from "Register-Sensitive Selection, Duplication, and
Take the following:
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128"
target triple = "i386-apple-darwin8"
@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2]
define fastcc void @abort_gzip() noreturn nounwind {
//===---------------------------------------------------------------------===//
-unsigned log2(unsigned x) {
- return x > 1 ? 32-__builtin_clz(x-1) : 0;
+unsigned t(unsigned a, unsigned b) {
+ return a <= b ? 5 : -5;
}
-generates (x86_64):
- xorl %eax, %eax
- cmpl $2, %edi
- jb LBB0_2
-## BB#1:
- decl %edi
- movl $63, %eax
- bsrl %edi, %ecx
- cmovel %eax, %ecx
- xorl $31, %ecx
- movl $32, %eax
- subl %ecx, %eax
-LBB0_2:
- ret
-
-The cmov and the early test are redundant:
- xorl %eax, %eax
- cmpl $2, %edi
- jb LBB0_2
-## BB#1:
- decl %edi
- bsrl %edi, %ecx
- xorl $31, %ecx
- movl $32, %eax
- subl %ecx, %eax
-LBB0_2:
- ret
-
-If we want to get really fancy we could use some two's complement magic:
- xorl %eax, %eax
- cmpl $2, %edi
- jb LBB0_2
-## BB#1:
- decl %edi
- bsrl %edi, %ecx
- xorl $-32, %ecx
- leal 33(%ecx), %eax
-LBB0_2:
- ret
-
-This is only useful on targets that can't encode the first operand of a sub
-directly. The rule is C1 - (X^C2) -> (C1+1) + (X^~C2).
+We generate:
+ movl $5, %ecx
+ cmpl %esi, %edi
+ movl $-5, %eax
+ cmovbel %ecx, %eax
+
+GCC:
+ cmpl %edi, %esi
+ sbbl %eax, %eax
+ andl $-10, %eax
+ addl $5, %eax
//===---------------------------------------------------------------------===//