Remove an incorrect overaggressive optimization

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index a345d3ddb4147c4618144ad63a6c96d95a7061dc..aad621f440ac7c3f424b371eacbbf39539ac01b5 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -339,6 +339,8 @@ we don't have whole-function selection dags.  On x86, this means we use one
  extra register for the function when effective_addr2 is declared as U64 than
  when it is declared U32.
  
+PHI Slicing could be extended to do this.
+
  //===---------------------------------------------------------------------===//
  
  LSR should know what GPR types a target has.  This code:
@@ -406,22 +408,6 @@ return:            ; preds = %then.1, %else.0, %then.0
  
  //===---------------------------------------------------------------------===//
  
-Tail recursion elimination is not transforming this function, because it is
-returning n, which fails the isDynamicConstant check in the accumulator 
-recursion checks.
-
-long long fib(const long long n) {
-  switch(n) {
-    case 0:
-    case 1:
-      return n;
-    default:
-      return fib(n-1) + fib(n-2);
-  }
-}
-
-//===---------------------------------------------------------------------===//
-
  Tail recursion elimination should handle:
  
  int pow2m1(int n) {
@@ -1229,6 +1215,40 @@ GCC PR33344 is a similar case.
  
  //===---------------------------------------------------------------------===//
  
+[PHI TRANSLATE INDEXED GEPs]  PR5313
+
+Load redundancy elimination for simple loop.  This loop:
+
+void append_text(const char* text,unsigned char * const  io) {
+  while(*text)
+    *io=*text++;
+}
+
+Compiles to have a fully redundant load in the loop (%2):
+
+define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind {
+entry:
+  %0 = load i8* %text, align 1                    ; <i8> [#uses=1]
+  %1 = icmp eq i8 %0, 0                           ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ]  ; <i32> [#uses=2]
+  %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1]
+  %2 = load i8* %text_addr.04, align 1            ; <i8> [#uses=1]
+  store i8 %2, i8* %io, align 1
+  %tmp = add i32 %indvar, 1                       ; <i32> [#uses=2]
+  %scevgep = getelementptr i8* %text, i32 %tmp    ; <i8*> [#uses=1]
+  %3 = load i8* %scevgep, align 1                 ; <i8> [#uses=1]
+  %4 = icmp eq i8 %3, 0                           ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+//===---------------------------------------------------------------------===//
+
  There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
  GCC testsuite.  There are many pre testcases as ssa-pre-*.c
  
@@ -1594,12 +1614,6 @@ int int_char(char m) {if(m>7) return 0; return m;}
  
  //===---------------------------------------------------------------------===//
  
-IPSCCP is propagating elements of first class aggregates, but is not propagating
-the entire aggregate itself.  This leads it to miss opportunities, for example
-in test/Transforms/SCCP/ipsccp-basic.ll:test5b.
-
-//===---------------------------------------------------------------------===//
-
  int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; }
  
  Generates this:
@@ -1668,3 +1682,55 @@ entry:
  }
  
  //===---------------------------------------------------------------------===//
+
+IPSCCP does not currently propagate argument dependent constants through
+functions where it does not not all of the callers.  This includes functions
+with normal external linkage as well as templates, C99 inline functions etc.
+Specifically, it does nothing to:
+
+define i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %0 = add nsw i32 %y, %z                         
+  %1 = mul i32 %0, %x                             
+  %2 = mul i32 %y, %z                             
+  %3 = add nsw i32 %1, %2                         
+  ret i32 %3
+}
+
+define i32 @test2() nounwind {
+entry:
+  %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind
+  ret i32 %0
+}
+
+It would be interesting extend IPSCCP to be able to handle simple cases like
+this, where all of the arguments to a call are constant.  Because IPSCCP runs
+before inlining, trivial templates and inline functions are not yet inlined.
+The results for a function + set of constant arguments should be memoized in a
+map.
+
+//===---------------------------------------------------------------------===//
+
+The libcall constant folding stuff should be moved out of SimplifyLibcalls into
+libanalysis' constantfolding logic.  This would allow IPSCCP to be able to
+handle simple things like this:
+
+static int foo(const char *X) { return strlen(X); }
+int bar() { return foo("abcd"); }
+
+//===---------------------------------------------------------------------===//
+
+InstCombine should use SimplifyDemandedBits to remove the or instruction:
+
+define i1 @test(i8 %x, i8 %y) {
+  %A = or i8 %x, 1
+  %B = icmp ugt i8 %A, 3
+  ret i1 %B
+}
+
+Currently instcombine calls SimplifyDemandedBits with either all bits or just
+the sign bit, if the comparison is obviously a sign test. In this case, we only
+need all but the bottom two bits from %A, and if we gave that mask to SDB it
+would delete the or instruction for us.
+
+//===---------------------------------------------------------------------===//