[x86] Added _addcarry_ and _subborrow_ intrinsics

[oota-llvm.git] / lib / Target / X86 / README.txt
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index f9c1d3574325eaf88a9081dabb18567953c4ba04..52d3c01076de2f69d27112c8e1e8c6caf5897bcc 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -123,7 +123,7 @@ flags.
  The instruction selector sometimes misses folding a load into a compare.  The
  pattern is written as (cmp reg, (load p)).  Because the compare isn't 
  commutative, it is not matched with the load on both sides.  The dag combiner
-should be made smart enough to cannonicalize the load into the RHS of a compare
+should be made smart enough to canonicalize the load into the RHS of a compare
  when it can invert the result of the compare for free.
  
  //===---------------------------------------------------------------------===//
@@ -1444,54 +1444,6 @@ it would be nice to produce "into" someday.
  
  //===---------------------------------------------------------------------===//
  
-This code:
-
-void vec_mpys1(int y[], const int x[], int scaler) {
-int i;
-for (i = 0; i < 150; i++)
- y[i] += (((long long)scaler * (long long)x[i]) >> 31);
-}
-
-Compiles to this loop with GCC 3.x:
-
-.L5:
-       movl    %ebx, %eax
-       imull   (%edi,%ecx,4)
-       shrdl   $31, %edx, %eax
-       addl    %eax, (%esi,%ecx,4)
-       incl    %ecx
-       cmpl    $149, %ecx
-       jle     .L5
-
-llvm-gcc compiles it to the much uglier:
-
-LBB1_1:        ## bb1
-       movl    24(%esp), %eax
-       movl    (%eax,%edi,4), %ebx
-       movl    %ebx, %ebp
-       imull   %esi, %ebp
-       movl    %ebx, %eax
-       mull    %ecx
-       addl    %ebp, %edx
-       sarl    $31, %ebx
-       imull   %ecx, %ebx
-       addl    %edx, %ebx
-       shldl   $1, %eax, %ebx
-       movl    20(%esp), %eax
-       addl    %ebx, (%eax,%edi,4)
-       incl    %edi
-       cmpl    $150, %edi
-       jne     LBB1_1  ## bb1
-
-The issue is that we hoist the cast of "scaler" to long long outside of the
-loop, the value comes into the loop as two values, and
-RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
-constructed BUILD_PAIR which represents the cast value.
-
-This can be handled by making CodeGenPrepare sink the cast.
-
-//===---------------------------------------------------------------------===//
-
  Test instructions can be eliminated by using EFLAGS values from arithmetic
  instructions. This is currently not done for mul, and, or, xor, neg, shl,
  sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
@@ -1567,43 +1519,6 @@ The first one is done for all AMDs, Core2, and "Generic"
  The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
    Core 2, and "Generic"
  
-//===---------------------------------------------------------------------===//
-
-Testcase:
-int a(int x) { return (x & 127) > 31; }
-
-Current output:
-       movl    4(%esp), %eax
-       andl    $127, %eax
-       cmpl    $31, %eax
-       seta    %al
-       movzbl  %al, %eax
-       ret
-
-Ideal output:
-       xorl    %eax, %eax
-       testl   $96, 4(%esp)
-       setne   %al
-       ret
-
-This should definitely be done in instcombine, canonicalizing the range
-condition into a != condition.  We get this IR:
-
-define i32 @a(i32 %x) nounwind readnone {
-entry:
-       %0 = and i32 %x, 127            ; <i32> [#uses=1]
-       %1 = icmp ugt i32 %0, 31                ; <i1> [#uses=1]
-       %2 = zext i1 %1 to i32          ; <i32> [#uses=1]
-       ret i32 %2
-}
-
-Instcombine prefers to strength reduce relational comparisons to equality
-comparisons when possible, this should be another case of that.  This could
-be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
-looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
-be redesigned to use ComputeMaskedBits and friends.
-
-
  //===---------------------------------------------------------------------===//
  Testcase:
  int x(int a) { return (a&0xf0)>>4; }
@@ -2060,3 +1975,21 @@ Instead we could generate:
  The trick is to match "fetch_and_add(X, -C) == C".
  
  //===---------------------------------------------------------------------===//
+
+unsigned t(unsigned a, unsigned b) {
+  return a <= b ? 5 : -5;
+}
+
+We generate:
+       movl    $5, %ecx
+       cmpl    %esi, %edi
+       movl    $-5, %eax
+       cmovbel %ecx, %eax
+
+GCC:
+       cmpl    %edi, %esi
+       sbbl    %eax, %eax
+       andl    $-10, %eax
+       addl    $5, %eax
+
+//===---------------------------------------------------------------------===//