X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FREADME.txt;h=6a8a4fdf2520d81a8b1f5e9828f69cc6b2bcd024;hb=425e951734c3a0615e22ec94ffa51cc16ce6e483;hp=8237fbd094250af0315e0b7f200b985abd02e9eb;hpb=4c19b17a17ee1379b455f8dc4796af518dcb45e3;p=oota-llvm.git

diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 8237fbd0942..6a8a4fdf252 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2,11 +2,6 @@
 // Random ideas for the X86 backend.
 //===---------------------------------------------------------------------===//
 
-We should add support for the "movbe" instruction, which does a byte-swapping
-copy (3-addr bswap + memory support?)  This is available on Atom processors.
-
-//===---------------------------------------------------------------------===//
-
 This should be one DIV/IDIV instruction, not a libcall:
 
 unsigned test(unsigned long long X, unsigned Y) {
@@ -61,7 +56,7 @@ cmovs, we should expand to a conditional branch like GCC produces.
 
 Some isel ideas:
 
-1. Dynamic programming based approach when compile time if not an
+1. Dynamic programming based approach when compile time is not an
    issue.
 2. Code duplication (addressing mode) during isel.
 3. Other ideas from "Register-Sensitive Selection, Duplication, and
@@ -1222,7 +1217,7 @@ Also check why xmm7 is not used at all in the function.
 
 Take the following:
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128"
 target triple = "i386-apple-darwin8"
 @in_exit.4870.b = internal global i1 false		; <i1*> [#uses=2]
 define fastcc void @abort_gzip() noreturn nounwind  {
@@ -1727,26 +1722,6 @@ are functionally identical.
 
 //===---------------------------------------------------------------------===//
 
-Take the following C code:
-int x(int y) { return (y & 63) << 14; }
-
-Code produced by gcc:
-	andl	$63, %edi
-	sall	$14, %edi
-	movl	%edi, %eax
-	ret
-
-Code produced by clang:
-	shll	$14, %edi
-	movl	%edi, %eax
-	andl	$1032192, %eax
-	ret
-
-The code produced by gcc is 3 bytes shorter.  This sort of construct often
-shows up with bitfields.
-
-//===---------------------------------------------------------------------===//
-
 Take the following C code:
 int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
 
@@ -2060,3 +2035,46 @@ _clamp2:                                ## @clamp2
 The move of 0 could be scheduled above the test to make it is xor reg,reg.
 
 //===---------------------------------------------------------------------===//
+
+GCC PR48986.  We currently compile this:
+
+void bar(void);
+void yyy(int* p) {
+    if (__sync_fetch_and_add(p, -1) == 1)
+      bar();
+}
+
+into:
+	movl	$-1, %eax
+	lock
+	xaddl	%eax, (%rdi)
+	cmpl	$1, %eax
+	je	LBB0_2
+
+Instead we could generate:
+
+	lock
+	dec %rdi
+	je LBB0_2
+
+The trick is to match "fetch_and_add(X, -C) == C".
+
+//===---------------------------------------------------------------------===//
+
+unsigned t(unsigned a, unsigned b) {
+  return a <= b ? 5 : -5;
+}
+
+We generate:
+	movl	$5, %ecx
+	cmpl	%esi, %edi
+	movl	$-5, %eax
+	cmovbel	%ecx, %eax
+
+GCC:
+	cmpl	%edi, %esi
+	sbbl	%eax, %eax
+	andl	$-10, %eax
+	addl	$5, %eax
+
+//===---------------------------------------------------------------------===//