Fix some simple copy-paste errors in MBlaze ASM Parser and Makefile.

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index 5032b2bbbb5b172cd25dc47c986592a7d24c9406..1f69ffb09c0a4a04298f9e84b91acd2b3c06b1ec 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -870,11 +870,6 @@ rshift_gt (unsigned int a)
     bar ();
  }
  
-void neg_eq_cst(unsigned int a) {
-if (-a == 123)
-bar();
-}
-
  All should simplify to a single comparison.  All of these are
  currently not optimized with "clang -emit-llvm-bc | opt
  -std-compile-opts".
@@ -1767,7 +1762,6 @@ case it choses instead to keep the max operation obvious.
  
  //===---------------------------------------------------------------------===//
  
-Switch lowering generates less than ideal code for the following switch:
  define void @a(i32 %x) nounwind {
  entry:
    switch i32 %x, label %if.end [
@@ -1788,19 +1782,15 @@ declare void @foo()
  Generated code on x86-64 (other platforms give similar results):
  a:
         cmpl    $5, %edi
-       ja      .LBB0_2
-       movl    %edi, %eax
-       movl    $47, %ecx
-       btq     %rax, %rcx
-       jb      .LBB0_3
+       ja      LBB2_2
+       cmpl    $4, %edi
+       jne     LBB2_3
  .LBB0_2:
         ret
  .LBB0_3:
         jmp     foo  # TAILCALL
  
-The movl+movl+btq+jb could be simplified to a cmpl+jne.
-
-Or, if we wanted to be really clever, we could simplify the whole thing to
+If we wanted to be really clever, we could simplify the whole thing to
  something like the following, which eliminates a branch:
         xorl    $1, %edi
         cmpl    $4, %edi
@@ -2259,78 +2249,112 @@ icmp transform.
  
  //===---------------------------------------------------------------------===//
  
-These functions:
-int foo(int *X) {
-  if ((*X & 255) == 47)
-    bar();
+This code:
+
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:1;
+int f4:29;
+} t1;
+
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:30;
+} t2;
+
+t1 s1;
+t2 s2;
+
+void func1(void)
+{
+s1.f1 = s2.f1;
+s1.f2 = s2.f2;
  }
-int foo2(int X) {
-  if ((X & 255) == 47)
-    bar();
+
+Compiles into this IR (on x86-64 at least):
+
+%struct.t1 = type { i8, [3 x i8] }
+@s2 = global %struct.t1 zeroinitializer, align 4
+@s1 = global %struct.t1 zeroinitializer, align 4
+define void @func1() nounwind ssp noredzone {
+entry:
+  %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4
+  %bf.val.sext5 = and i32 %0, 1
+  %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4
+  %2 = and i32 %1, -4
+  %3 = or i32 %2, %bf.val.sext5
+  %bf.val.sext26 = and i32 %0, 2
+  %4 = or i32 %3, %bf.val.sext26
+  store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4
+  ret void
  }
  
-codegen to:
+The two or/and's should be merged into one each.
  
-  movzbl       (%rdi), %eax
-  cmpl $47, %eax
-  jne  LBB0_2
+//===---------------------------------------------------------------------===//
  
-and:
-  movzbl       %dil, %eax
-  cmpl $47, %eax
-  jne  LBB1_2
+Machine level code hoisting can be useful in some cases.  For example, PR9408
+is about:
  
-If a dag combine shrunk the compare to a byte compare, then we'd fold the load
-in the first example, and eliminate the movzbl in the second, saving a register.
-This can be a target independent dag combine that works on ISD::SETCC, it would
-catch this before the legalize ops pass.
+typedef union {
+ void (*f1)(int);
+ void (*f2)(long);
+} funcs;
  
-//===---------------------------------------------------------------------===//
+void foo(funcs f, int which) {
+ int a = 5;
+ if (which) {
+   f.f1(a);
+ } else {
+   f.f2(a);
+ }
+}
  
-We should optimize this:
+which we compile to:
  
-  %tmp = load i16* %arrayidx, align 4, !tbaa !0
-  %A = trunc i16 %tmp to i8
-  %cmp = icmp eq i8 %A, 127
-  %B.mask = and i16 %tmp, -256
-  %cmp7 = icmp eq i16 %B.mask, 17664
-  %or.cond = and i1 %cmp, %cmp7
-  br i1 %or.cond, label %land.lhs.true9, label %if.end
+foo:                                    # @foo
+# BB#0:                                 # %entry
+       pushq   %rbp
+       movq    %rsp, %rbp
+       testl   %esi, %esi
+       movq    %rdi, %rax
+       je      .LBB0_2
+# BB#1:                                 # %if.then
+       movl    $5, %edi
+       callq   *%rax
+       popq    %rbp
+       ret
+.LBB0_2:                                # %if.else
+       movl    $5, %edi
+       callq   *%rax
+       popq    %rbp
+       ret
  
-into:
+Note that bb1 and bb2 are the same.  This doesn't happen at the IR level
+because one call is passing an i32 and the other is passing an i64.
  
-  %tmp = load i16* %arrayidx, align 4, !tbaa !0
-  %0 = icmp eq i16 %tmp, 17791
-  br i1 %0, label %land.lhs.true9, label %if.end
-
-with this patch:
-Index: InstCombine/InstCombineCompares.cpp
-===================================================================
---- InstCombine/InstCombineCompares.cpp        (revision 129500)
-+++ InstCombine/InstCombineCompares.cpp        (working copy)
-@@ -2506,6 +2506,18 @@
-         return &I;
-       }
-     }
-+    
-+    // Transform "icmp eq (trunc X), cst" to "icmp (and X, mask), cst"
-+    if (Op0->hasOneUse() && match(Op0, m_Trunc(m_Value(A))) &&
-+        isa<ConstantInt>(Op1)) {
-+      APInt MaskV = APInt::getLowBitsSet(A->getType()->getPrimitiveSizeInBits(),
-+                                      Op0->getType()->getPrimitiveSizeInBits());
-+      Value *Mask =
-+        Builder->CreateAnd(A, ConstantInt::get(A->getContext(), MaskV));
-+      return new ICmpInst(I.getPredicate(), Mask,
-+                          ConstantExpr::getZExt(cast<ConstantInt>(Op1),
-+                                                Mask->getType()));
-+    }
-   }
-   
-   {
+//===---------------------------------------------------------------------===//
+
+I see this sort of pattern in 176.gcc in a few places (e.g. the start of
+store_bit_field).  The rem should be replaced with a multiply and subtract:
  
+  %3 = sdiv i32 %A, %B
+  %4 = srem i32 %A, %B
  
-but we can't do that until the dag combine above is added.  Not having this
-is blocking resolving PR6627.
+Similarly for udiv/urem.  Note that this shouldn't be done on X86 or ARM,
+which can do this in a single operation (instruction or libcall).  It is
+probably best to do this in the code generator.
  
  //===---------------------------------------------------------------------===//
  
+unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; }
+should fold to (x & y) == 0.
+
+//===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
+should fold to x > y.
+
+//===---------------------------------------------------------------------===//