Support v8f32 to v8i8/vi816 conversion through custom lowering

[oota-llvm.git] / lib / Target / X86 / README.txt
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index d6ceaa7ed4b614ba60f606cb22aa1e7e4abfe71a..6a8a4fdf2520d81a8b1f5e9828f69cc6b2bcd024 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2,11 +2,6 @@
  // Random ideas for the X86 backend.
  //===---------------------------------------------------------------------===//
  
-We should add support for the "movbe" instruction, which does a byte-swapping
-copy (3-addr bswap + memory support?)  This is available on Atom processors.
-
-//===---------------------------------------------------------------------===//
-
  This should be one DIV/IDIV instruction, not a libcall:
  
  unsigned test(unsigned long long X, unsigned Y) {
@@ -61,7 +56,7 @@ cmovs, we should expand to a conditional branch like GCC produces.
  
  Some isel ideas:
  
-1. Dynamic programming based approach when compile time if not an
+1. Dynamic programming based approach when compile time is not an
     issue.
  2. Code duplication (addressing mode) during isel.
  3. Other ideas from "Register-Sensitive Selection, Duplication, and
@@ -1222,7 +1217,7 @@ Also check why xmm7 is not used at all in the function.
  
  Take the following:
  
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128"
  target triple = "i386-apple-darwin8"
  @in_exit.4870.b = internal global i1 false             ; <i1*> [#uses=2]
  define fastcc void @abort_gzip() noreturn nounwind  {
@@ -2066,51 +2061,20 @@ The trick is to match "fetch_and_add(X, -C) == C".
  
  //===---------------------------------------------------------------------===//
  
-unsigned log2(unsigned x) {
-  return x > 1 ? 32-__builtin_clz(x-1) : 0;
+unsigned t(unsigned a, unsigned b) {
+  return a <= b ? 5 : -5;
  }
  
-generates (x86_64):
-       xorl    %eax, %eax
-       cmpl    $2, %edi
-       jb      LBB0_2
-## BB#1:
-       decl    %edi
-       movl    $63, %eax
-       bsrl    %edi, %ecx
-       cmovel  %eax, %ecx
-       xorl    $31, %ecx
-       movl    $32, %eax
-       subl    %ecx, %eax
-LBB0_2:
-       ret
-
-The cmov and the early test are redundant:
-       xorl    %eax, %eax
-       cmpl    $2, %edi
-       jb      LBB0_2
-## BB#1:
-       decl    %edi
-       bsrl    %edi, %ecx
-       xorl    $31, %ecx
-       movl    $32, %eax
-       subl    %ecx, %eax
-LBB0_2:
-       ret
-
-If we want to get really fancy we could use some two's complement magic:
-       xorl    %eax, %eax
-       cmpl    $2, %edi
-       jb      LBB0_2
-## BB#1:
-       decl    %edi
-       bsrl    %edi, %ecx
-       xorl    $-32, %ecx
-       leal    33(%ecx), %eax
-LBB0_2:
-       ret
-
-This is only useful on targets that can't encode the first operand of a sub
-directly.  The rule is C1 - (X^C2) -> (C1+1) + (X^~C2).
+We generate:
+       movl    $5, %ecx
+       cmpl    %esi, %edi
+       movl    $-5, %eax
+       cmovbel %ecx, %eax
+
+GCC:
+       cmpl    %edi, %esi
+       sbbl    %eax, %eax
+       andl    $-10, %eax
+       addl    $5, %eax
  
  //===---------------------------------------------------------------------===//