X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FREADME.txt;h=b4285a0718799c379eb2a281f159d2c1a42784ed;hb=df1087061c58c1734efd363715f221ac7d426cbb;hp=560947a4a04b79882b77d5994a0eab6842eed9c0;hpb=5d4718b1c780aa7f393202307791dd7cfaaca1f0;p=oota-llvm.git diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 560947a4a04..b4285a07187 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2,11 +2,6 @@ // Random ideas for the X86 backend. //===---------------------------------------------------------------------===// -We should add support for the "movbe" instruction, which does a byte-swapping -copy (3-addr bswap + memory support?) This is available on Atom processors. - -//===---------------------------------------------------------------------===// - This should be one DIV/IDIV instruction, not a libcall: unsigned test(unsigned long long X, unsigned Y) { @@ -61,7 +56,7 @@ cmovs, we should expand to a conditional branch like GCC produces. Some isel ideas: -1. Dynamic programming based approach when compile time if not an +1. Dynamic programming based approach when compile time is not an issue. 2. Code duplication (addressing mode) during isel. 3. Other ideas from "Register-Sensitive Selection, Duplication, and @@ -1222,7 +1217,7 @@ Also check why xmm7 is not used at all in the function. Take the following: -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128" target triple = "i386-apple-darwin8" @in_exit.4870.b = internal global i1 false ; [#uses=2] define fastcc void @abort_gzip() noreturn nounwind { @@ -1572,43 +1567,6 @@ The first one is done for all AMDs, Core2, and "Generic" The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" -//===---------------------------------------------------------------------===// - -Testcase: -int a(int x) { return (x & 127) > 31; } - -Current output: - movl 4(%esp), %eax - andl $127, %eax - cmpl $31, %eax - seta %al - movzbl %al, %eax - ret - -Ideal output: - xorl %eax, %eax - testl $96, 4(%esp) - setne %al - ret - -This should definitely be done in instcombine, canonicalizing the range -condition into a != condition. We get this IR: - -define i32 @a(i32 %x) nounwind readnone { -entry: - %0 = and i32 %x, 127 ; [#uses=1] - %1 = icmp ugt i32 %0, 31 ; [#uses=1] - %2 = zext i1 %1 to i32 ; [#uses=1] - ret i32 %2 -} - -Instcombine prefers to strength reduce relational comparisons to equality -comparisons when possible, this should be another case of that. This could -be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it -looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already -be redesigned to use ComputeMaskedBits and friends. - - //===---------------------------------------------------------------------===// Testcase: int x(int a) { return (a&0xf0)>>4; } @@ -2066,3 +2024,20 @@ The trick is to match "fetch_and_add(X, -C) == C". //===---------------------------------------------------------------------===// +unsigned t(unsigned a, unsigned b) { + return a <= b ? 5 : -5; +} + +We generate: + movl $5, %ecx + cmpl %esi, %edi + movl $-5, %eax + cmovbel %ecx, %eax + +GCC: + cmpl %edi, %esi + sbbl %eax, %eax + andl $-10, %eax + addl $5, %eax + +//===---------------------------------------------------------------------===//